xen-netfront.c 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720
  1. /*
  2. * Virtual network driver for conversing with remote driver backends.
  3. *
  4. * Copyright (c) 2002-2005, K A Fraser
  5. * Copyright (c) 2005, XenSource Ltd
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License version 2
  9. * as published by the Free Software Foundation; or, when distributed
  10. * separately from the Linux kernel or incorporated into other
  11. * software packages, subject to the following license:
  12. *
  13. * Permission is hereby granted, free of charge, to any person obtaining a copy
  14. * of this source file (the "Software"), to deal in the Software without
  15. * restriction, including without limitation the rights to use, copy, modify,
  16. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17. * and to permit persons to whom the Software is furnished to do so, subject to
  18. * the following conditions:
  19. *
  20. * The above copyright notice and this permission notice shall be included in
  21. * all copies or substantial portions of the Software.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29. * IN THE SOFTWARE.
  30. */
  31. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  32. #include <linux/module.h>
  33. #include <linux/kernel.h>
  34. #include <linux/netdevice.h>
  35. #include <linux/etherdevice.h>
  36. #include <linux/skbuff.h>
  37. #include <linux/ethtool.h>
  38. #include <linux/if_ether.h>
  39. #include <net/tcp.h>
  40. #include <linux/udp.h>
  41. #include <linux/moduleparam.h>
  42. #include <linux/mm.h>
  43. #include <linux/slab.h>
  44. #include <net/ip.h>
  45. #include <linux/bpf.h>
  46. #include <net/page_pool/types.h>
  47. #include <linux/bpf_trace.h>
  48. #include <xen/xen.h>
  49. #include <xen/xenbus.h>
  50. #include <xen/events.h>
  51. #include <xen/page.h>
  52. #include <xen/platform_pci.h>
  53. #include <xen/grant_table.h>
  54. #include <xen/interface/io/netif.h>
  55. #include <xen/interface/memory.h>
  56. #include <xen/interface/grant_table.h>
  57. /* Module parameters */
  58. #define MAX_QUEUES_DEFAULT 8
  59. static unsigned int xennet_max_queues;
  60. module_param_named(max_queues, xennet_max_queues, uint, 0644);
  61. MODULE_PARM_DESC(max_queues,
  62. "Maximum number of queues per virtual interface");
  63. static bool __read_mostly xennet_trusted = true;
  64. module_param_named(trusted, xennet_trusted, bool, 0644);
  65. MODULE_PARM_DESC(trusted, "Is the backend trusted");
  66. #define XENNET_TIMEOUT (5 * HZ)
  67. static const struct ethtool_ops xennet_ethtool_ops;
  68. struct netfront_cb {
  69. int pull_to;
  70. };
  71. #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
  72. #define RX_COPY_THRESHOLD 256
  73. #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
  74. #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
  75. /* Minimum number of Rx slots (includes slot for GSO metadata). */
  76. #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  77. /* Queue name is interface name with "-qNNN" appended */
  78. #define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  79. /* IRQ name is queue name with "-tx" or "-rx" appended */
  80. #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  81. static DECLARE_WAIT_QUEUE_HEAD(module_wq);
  82. struct netfront_stats {
  83. u64_stats_t packets;
  84. u64_stats_t bytes;
  85. struct u64_stats_sync syncp;
  86. };
  87. struct netfront_info;
  88. struct netfront_queue {
  89. unsigned int id; /* Queue ID, 0-based */
  90. char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
  91. struct netfront_info *info;
  92. struct bpf_prog __rcu *xdp_prog;
  93. struct napi_struct napi;
  94. /* Split event channels support, tx_* == rx_* when using
  95. * single event channel.
  96. */
  97. unsigned int tx_evtchn, rx_evtchn;
  98. unsigned int tx_irq, rx_irq;
  99. /* Only used when split event channels support is enabled */
  100. char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
  101. char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
  102. spinlock_t tx_lock;
  103. struct xen_netif_tx_front_ring tx;
  104. int tx_ring_ref;
  105. /*
  106. * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
  107. * are linked from tx_skb_freelist through tx_link.
  108. */
  109. struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
  110. unsigned short tx_link[NET_TX_RING_SIZE];
  111. #define TX_LINK_NONE 0xffff
  112. #define TX_PENDING 0xfffe
  113. grant_ref_t gref_tx_head;
  114. grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
  115. struct page *grant_tx_page[NET_TX_RING_SIZE];
  116. unsigned tx_skb_freelist;
  117. unsigned int tx_pend_queue;
  118. spinlock_t rx_lock ____cacheline_aligned_in_smp;
  119. struct xen_netif_rx_front_ring rx;
  120. int rx_ring_ref;
  121. struct timer_list rx_refill_timer;
  122. struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
  123. grant_ref_t gref_rx_head;
  124. grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
  125. unsigned int rx_rsp_unconsumed;
  126. spinlock_t rx_cons_lock;
  127. struct page_pool *page_pool;
  128. struct xdp_rxq_info xdp_rxq;
  129. };
  130. struct netfront_info {
  131. struct list_head list;
  132. struct net_device *netdev;
  133. struct xenbus_device *xbdev;
  134. /* Multi-queue support */
  135. struct netfront_queue *queues;
  136. /* Statistics */
  137. struct netfront_stats __percpu *rx_stats;
  138. struct netfront_stats __percpu *tx_stats;
  139. /* XDP state */
  140. bool netback_has_xdp_headroom;
  141. bool netfront_xdp_enabled;
  142. /* Is device behaving sane? */
  143. bool broken;
  144. /* Should skbs be bounced into a zeroed buffer? */
  145. bool bounce;
  146. atomic_t rx_gso_checksum_fixup;
  147. };
  148. struct netfront_rx_info {
  149. struct xen_netif_rx_response rx;
  150. struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
  151. };
  152. /*
  153. * Access macros for acquiring freeing slots in tx_skbs[].
  154. */
  155. static void add_id_to_list(unsigned *head, unsigned short *list,
  156. unsigned short id)
  157. {
  158. list[id] = *head;
  159. *head = id;
  160. }
  161. static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
  162. {
  163. unsigned int id = *head;
  164. if (id != TX_LINK_NONE) {
  165. *head = list[id];
  166. list[id] = TX_LINK_NONE;
  167. }
  168. return id;
  169. }
  170. static int xennet_rxidx(RING_IDX idx)
  171. {
  172. return idx & (NET_RX_RING_SIZE - 1);
  173. }
  174. static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
  175. RING_IDX ri)
  176. {
  177. int i = xennet_rxidx(ri);
  178. struct sk_buff *skb = queue->rx_skbs[i];
  179. queue->rx_skbs[i] = NULL;
  180. return skb;
  181. }
  182. static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
  183. RING_IDX ri)
  184. {
  185. int i = xennet_rxidx(ri);
  186. grant_ref_t ref = queue->grant_rx_ref[i];
  187. queue->grant_rx_ref[i] = INVALID_GRANT_REF;
  188. return ref;
  189. }
  190. #ifdef CONFIG_SYSFS
  191. static const struct attribute_group xennet_dev_group;
  192. #endif
  193. static bool xennet_can_sg(struct net_device *dev)
  194. {
  195. return dev->features & NETIF_F_SG;
  196. }
  197. static void rx_refill_timeout(struct timer_list *t)
  198. {
  199. struct netfront_queue *queue = timer_container_of(queue, t,
  200. rx_refill_timer);
  201. napi_schedule(&queue->napi);
  202. }
  203. static int netfront_tx_slot_available(struct netfront_queue *queue)
  204. {
  205. return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
  206. (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
  207. }
  208. static void xennet_maybe_wake_tx(struct netfront_queue *queue)
  209. {
  210. struct net_device *dev = queue->info->netdev;
  211. struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
  212. if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
  213. netfront_tx_slot_available(queue) &&
  214. likely(netif_running(dev)))
  215. netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
  216. }
  217. static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
  218. {
  219. struct sk_buff *skb;
  220. struct page *page;
  221. skb = __netdev_alloc_skb(queue->info->netdev,
  222. RX_COPY_THRESHOLD + NET_IP_ALIGN,
  223. GFP_ATOMIC | __GFP_NOWARN);
  224. if (unlikely(!skb))
  225. return NULL;
  226. page = page_pool_alloc_pages(queue->page_pool,
  227. GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
  228. if (unlikely(!page)) {
  229. kfree_skb(skb);
  230. return NULL;
  231. }
  232. skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
  233. skb_mark_for_recycle(skb);
  234. /* Align ip header to a 16 bytes boundary */
  235. skb_reserve(skb, NET_IP_ALIGN);
  236. skb->dev = queue->info->netdev;
  237. return skb;
  238. }
  239. static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
  240. {
  241. RING_IDX req_prod = queue->rx.req_prod_pvt;
  242. int notify;
  243. int err = 0;
  244. if (unlikely(!netif_carrier_ok(queue->info->netdev)))
  245. return;
  246. for (req_prod = queue->rx.req_prod_pvt;
  247. req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
  248. req_prod++) {
  249. struct sk_buff *skb;
  250. unsigned short id;
  251. grant_ref_t ref;
  252. struct page *page;
  253. struct xen_netif_rx_request *req;
  254. skb = xennet_alloc_one_rx_buffer(queue);
  255. if (!skb) {
  256. err = -ENOMEM;
  257. break;
  258. }
  259. id = xennet_rxidx(req_prod);
  260. BUG_ON(queue->rx_skbs[id]);
  261. queue->rx_skbs[id] = skb;
  262. ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
  263. WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
  264. queue->grant_rx_ref[id] = ref;
  265. page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
  266. req = RING_GET_REQUEST(&queue->rx, req_prod);
  267. gnttab_page_grant_foreign_access_ref_one(ref,
  268. queue->info->xbdev->otherend_id,
  269. page,
  270. 0);
  271. req->id = id;
  272. req->gref = ref;
  273. }
  274. queue->rx.req_prod_pvt = req_prod;
  275. /* Try again later if there are not enough requests or skb allocation
  276. * failed.
  277. * Enough requests is quantified as the sum of newly created slots and
  278. * the unconsumed slots at the backend.
  279. */
  280. if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
  281. unlikely(err)) {
  282. mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
  283. return;
  284. }
  285. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
  286. if (notify)
  287. notify_remote_via_irq(queue->rx_irq);
  288. }
  289. static int xennet_open(struct net_device *dev)
  290. {
  291. struct netfront_info *np = netdev_priv(dev);
  292. unsigned int num_queues = dev->real_num_tx_queues;
  293. unsigned int i = 0;
  294. struct netfront_queue *queue = NULL;
  295. if (!np->queues || np->broken)
  296. return -ENODEV;
  297. for (i = 0; i < num_queues; ++i) {
  298. queue = &np->queues[i];
  299. napi_enable(&queue->napi);
  300. spin_lock_bh(&queue->rx_lock);
  301. if (netif_carrier_ok(dev)) {
  302. xennet_alloc_rx_buffers(queue);
  303. queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
  304. if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
  305. napi_schedule(&queue->napi);
  306. }
  307. spin_unlock_bh(&queue->rx_lock);
  308. }
  309. netif_tx_start_all_queues(dev);
  310. return 0;
  311. }
  312. static bool xennet_tx_buf_gc(struct netfront_queue *queue)
  313. {
  314. RING_IDX cons, prod;
  315. unsigned short id;
  316. struct sk_buff *skb;
  317. bool more_to_do;
  318. bool work_done = false;
  319. const struct device *dev = &queue->info->netdev->dev;
  320. BUG_ON(!netif_carrier_ok(queue->info->netdev));
  321. do {
  322. prod = queue->tx.sring->rsp_prod;
  323. if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
  324. dev_alert(dev, "Illegal number of responses %u\n",
  325. prod - queue->tx.rsp_cons);
  326. goto err;
  327. }
  328. rmb(); /* Ensure we see responses up to 'rp'. */
  329. for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
  330. struct xen_netif_tx_response txrsp;
  331. work_done = true;
  332. RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
  333. if (txrsp.status == XEN_NETIF_RSP_NULL)
  334. continue;
  335. id = txrsp.id;
  336. if (id >= RING_SIZE(&queue->tx)) {
  337. dev_alert(dev,
  338. "Response has incorrect id (%u)\n",
  339. id);
  340. goto err;
  341. }
  342. if (queue->tx_link[id] != TX_PENDING) {
  343. dev_alert(dev,
  344. "Response for inactive request\n");
  345. goto err;
  346. }
  347. queue->tx_link[id] = TX_LINK_NONE;
  348. skb = queue->tx_skbs[id];
  349. queue->tx_skbs[id] = NULL;
  350. if (unlikely(!gnttab_end_foreign_access_ref(
  351. queue->grant_tx_ref[id]))) {
  352. dev_alert(dev,
  353. "Grant still in use by backend domain\n");
  354. goto err;
  355. }
  356. gnttab_release_grant_reference(
  357. &queue->gref_tx_head, queue->grant_tx_ref[id]);
  358. queue->grant_tx_ref[id] = INVALID_GRANT_REF;
  359. queue->grant_tx_page[id] = NULL;
  360. add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
  361. dev_kfree_skb_irq(skb);
  362. }
  363. queue->tx.rsp_cons = prod;
  364. RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
  365. } while (more_to_do);
  366. xennet_maybe_wake_tx(queue);
  367. return work_done;
  368. err:
  369. queue->info->broken = true;
  370. dev_alert(dev, "Disabled for further use\n");
  371. return work_done;
  372. }
  373. struct xennet_gnttab_make_txreq {
  374. struct netfront_queue *queue;
  375. struct sk_buff *skb;
  376. struct page *page;
  377. struct xen_netif_tx_request *tx; /* Last request on ring page */
  378. struct xen_netif_tx_request tx_local; /* Last request local copy*/
  379. unsigned int size;
  380. };
  381. static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
  382. unsigned int len, void *data)
  383. {
  384. struct xennet_gnttab_make_txreq *info = data;
  385. unsigned int id;
  386. struct xen_netif_tx_request *tx;
  387. grant_ref_t ref;
  388. /* convenient aliases */
  389. struct page *page = info->page;
  390. struct netfront_queue *queue = info->queue;
  391. struct sk_buff *skb = info->skb;
  392. id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
  393. tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
  394. ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
  395. WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
  396. gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
  397. gfn, GNTMAP_readonly);
  398. queue->tx_skbs[id] = skb;
  399. queue->grant_tx_page[id] = page;
  400. queue->grant_tx_ref[id] = ref;
  401. info->tx_local.id = id;
  402. info->tx_local.gref = ref;
  403. info->tx_local.offset = offset;
  404. info->tx_local.size = len;
  405. info->tx_local.flags = 0;
  406. *tx = info->tx_local;
  407. /*
  408. * Put the request in the pending queue, it will be set to be pending
  409. * when the producer index is about to be raised.
  410. */
  411. add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
  412. info->tx = tx;
  413. info->size += info->tx_local.size;
  414. }
  415. static struct xen_netif_tx_request *xennet_make_first_txreq(
  416. struct xennet_gnttab_make_txreq *info,
  417. unsigned int offset, unsigned int len)
  418. {
  419. info->size = 0;
  420. gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
  421. return info->tx;
  422. }
  423. static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
  424. unsigned int len, void *data)
  425. {
  426. struct xennet_gnttab_make_txreq *info = data;
  427. info->tx->flags |= XEN_NETTXF_more_data;
  428. skb_get(info->skb);
  429. xennet_tx_setup_grant(gfn, offset, len, data);
  430. }
  431. static void xennet_make_txreqs(
  432. struct xennet_gnttab_make_txreq *info,
  433. struct page *page,
  434. unsigned int offset, unsigned int len)
  435. {
  436. /* Skip unused frames from start of page */
  437. page += offset >> PAGE_SHIFT;
  438. offset &= ~PAGE_MASK;
  439. while (len) {
  440. info->page = page;
  441. info->size = 0;
  442. gnttab_foreach_grant_in_range(page, offset, len,
  443. xennet_make_one_txreq,
  444. info);
  445. page++;
  446. offset = 0;
  447. len -= info->size;
  448. }
  449. }
  450. /*
  451. * Count how many ring slots are required to send this skb. Each frag
  452. * might be a compound page.
  453. */
  454. static int xennet_count_skb_slots(struct sk_buff *skb)
  455. {
  456. int i, frags = skb_shinfo(skb)->nr_frags;
  457. int slots;
  458. slots = gnttab_count_grant(offset_in_page(skb->data),
  459. skb_headlen(skb));
  460. for (i = 0; i < frags; i++) {
  461. skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  462. unsigned long size = skb_frag_size(frag);
  463. unsigned long offset = skb_frag_off(frag);
  464. /* Skip unused frames from start of page */
  465. offset &= ~PAGE_MASK;
  466. slots += gnttab_count_grant(offset, size);
  467. }
  468. return slots;
  469. }
  470. static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
  471. struct net_device *sb_dev)
  472. {
  473. unsigned int num_queues = dev->real_num_tx_queues;
  474. u32 hash;
  475. u16 queue_idx;
  476. /* First, check if there is only one queue */
  477. if (num_queues == 1) {
  478. queue_idx = 0;
  479. } else {
  480. hash = skb_get_hash(skb);
  481. queue_idx = hash % num_queues;
  482. }
  483. return queue_idx;
  484. }
  485. static void xennet_mark_tx_pending(struct netfront_queue *queue)
  486. {
  487. unsigned int i;
  488. while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
  489. TX_LINK_NONE)
  490. queue->tx_link[i] = TX_PENDING;
  491. }
  492. static int xennet_xdp_xmit_one(struct net_device *dev,
  493. struct netfront_queue *queue,
  494. struct xdp_frame *xdpf)
  495. {
  496. struct netfront_info *np = netdev_priv(dev);
  497. struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
  498. struct xennet_gnttab_make_txreq info = {
  499. .queue = queue,
  500. .skb = NULL,
  501. .page = virt_to_page(xdpf->data),
  502. };
  503. int notify;
  504. xennet_make_first_txreq(&info,
  505. offset_in_page(xdpf->data),
  506. xdpf->len);
  507. xennet_mark_tx_pending(queue);
  508. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
  509. if (notify)
  510. notify_remote_via_irq(queue->tx_irq);
  511. u64_stats_update_begin(&tx_stats->syncp);
  512. u64_stats_add(&tx_stats->bytes, xdpf->len);
  513. u64_stats_inc(&tx_stats->packets);
  514. u64_stats_update_end(&tx_stats->syncp);
  515. return 0;
  516. }
  517. static int xennet_xdp_xmit(struct net_device *dev, int n,
  518. struct xdp_frame **frames, u32 flags)
  519. {
  520. unsigned int num_queues = dev->real_num_tx_queues;
  521. struct netfront_info *np = netdev_priv(dev);
  522. struct netfront_queue *queue = NULL;
  523. unsigned long irq_flags;
  524. int nxmit = 0;
  525. int i;
  526. if (unlikely(np->broken))
  527. return -ENODEV;
  528. if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
  529. return -EINVAL;
  530. queue = &np->queues[smp_processor_id() % num_queues];
  531. spin_lock_irqsave(&queue->tx_lock, irq_flags);
  532. for (i = 0; i < n; i++) {
  533. struct xdp_frame *xdpf = frames[i];
  534. if (!xdpf)
  535. continue;
  536. if (xennet_xdp_xmit_one(dev, queue, xdpf))
  537. break;
  538. nxmit++;
  539. }
  540. spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
  541. return nxmit;
  542. }
  543. static struct sk_buff *bounce_skb(const struct sk_buff *skb)
  544. {
  545. unsigned int headerlen = skb_headroom(skb);
  546. /* Align size to allocate full pages and avoid contiguous data leaks */
  547. unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
  548. XEN_PAGE_SIZE);
  549. struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
  550. if (!n)
  551. return NULL;
  552. if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
  553. WARN_ONCE(1, "misaligned skb allocated\n");
  554. kfree_skb(n);
  555. return NULL;
  556. }
  557. /* Set the data pointer */
  558. skb_reserve(n, headerlen);
  559. /* Set the tail pointer and length */
  560. skb_put(n, skb->len);
  561. BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
  562. skb_copy_header(n, skb);
  563. return n;
  564. }
  565. #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
  566. static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
  567. {
  568. struct netfront_info *np = netdev_priv(dev);
  569. struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
  570. struct xen_netif_tx_request *first_tx;
  571. unsigned int i;
  572. int notify;
  573. int slots;
  574. struct page *page;
  575. unsigned int offset;
  576. unsigned int len;
  577. unsigned long flags;
  578. struct netfront_queue *queue = NULL;
  579. struct xennet_gnttab_make_txreq info = { };
  580. unsigned int num_queues = dev->real_num_tx_queues;
  581. u16 queue_index;
  582. struct sk_buff *nskb;
  583. /* Drop the packet if no queues are set up */
  584. if (num_queues < 1)
  585. goto drop;
  586. if (unlikely(np->broken))
  587. goto drop;
  588. /* Determine which queue to transmit this SKB on */
  589. queue_index = skb_get_queue_mapping(skb);
  590. queue = &np->queues[queue_index];
  591. /* If skb->len is too big for wire format, drop skb and alert
  592. * user about misconfiguration.
  593. */
  594. if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
  595. net_alert_ratelimited(
  596. "xennet: skb->len = %u, too big for wire format\n",
  597. skb->len);
  598. goto drop;
  599. }
  600. slots = xennet_count_skb_slots(skb);
  601. if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
  602. net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
  603. slots, skb->len);
  604. if (skb_linearize(skb))
  605. goto drop;
  606. }
  607. page = virt_to_page(skb->data);
  608. offset = offset_in_page(skb->data);
  609. /* The first req should be at least ETH_HLEN size or the packet will be
  610. * dropped by netback.
  611. *
  612. * If the backend is not trusted bounce all data to zeroed pages to
  613. * avoid exposing contiguous data on the granted page not belonging to
  614. * the skb.
  615. */
  616. if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
  617. nskb = bounce_skb(skb);
  618. if (!nskb)
  619. goto drop;
  620. dev_consume_skb_any(skb);
  621. skb = nskb;
  622. page = virt_to_page(skb->data);
  623. offset = offset_in_page(skb->data);
  624. }
  625. len = skb_headlen(skb);
  626. spin_lock_irqsave(&queue->tx_lock, flags);
  627. if (unlikely(!netif_carrier_ok(dev) ||
  628. (slots > 1 && !xennet_can_sg(dev)) ||
  629. netif_needs_gso(skb, netif_skb_features(skb)))) {
  630. spin_unlock_irqrestore(&queue->tx_lock, flags);
  631. goto drop;
  632. }
  633. /* First request for the linear area. */
  634. info.queue = queue;
  635. info.skb = skb;
  636. info.page = page;
  637. first_tx = xennet_make_first_txreq(&info, offset, len);
  638. offset += info.tx_local.size;
  639. if (offset == PAGE_SIZE) {
  640. page++;
  641. offset = 0;
  642. }
  643. len -= info.tx_local.size;
  644. if (skb->ip_summed == CHECKSUM_PARTIAL)
  645. /* local packet? */
  646. first_tx->flags |= XEN_NETTXF_csum_blank |
  647. XEN_NETTXF_data_validated;
  648. else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
  649. /* remote but checksummed. */
  650. first_tx->flags |= XEN_NETTXF_data_validated;
  651. /* Optional extra info after the first request. */
  652. if (skb_shinfo(skb)->gso_size) {
  653. struct xen_netif_extra_info *gso;
  654. gso = (struct xen_netif_extra_info *)
  655. RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
  656. first_tx->flags |= XEN_NETTXF_extra_info;
  657. gso->u.gso.size = skb_shinfo(skb)->gso_size;
  658. gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
  659. XEN_NETIF_GSO_TYPE_TCPV6 :
  660. XEN_NETIF_GSO_TYPE_TCPV4;
  661. gso->u.gso.pad = 0;
  662. gso->u.gso.features = 0;
  663. gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
  664. gso->flags = 0;
  665. }
  666. /* Requests for the rest of the linear area. */
  667. xennet_make_txreqs(&info, page, offset, len);
  668. /* Requests for all the frags. */
  669. for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  670. skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  671. xennet_make_txreqs(&info, skb_frag_page(frag),
  672. skb_frag_off(frag),
  673. skb_frag_size(frag));
  674. }
  675. /* First request has the packet length. */
  676. first_tx->size = skb->len;
  677. /* timestamp packet in software */
  678. skb_tx_timestamp(skb);
  679. xennet_mark_tx_pending(queue);
  680. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
  681. if (notify)
  682. notify_remote_via_irq(queue->tx_irq);
  683. u64_stats_update_begin(&tx_stats->syncp);
  684. u64_stats_add(&tx_stats->bytes, skb->len);
  685. u64_stats_inc(&tx_stats->packets);
  686. u64_stats_update_end(&tx_stats->syncp);
  687. if (!netfront_tx_slot_available(queue))
  688. netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
  689. spin_unlock_irqrestore(&queue->tx_lock, flags);
  690. return NETDEV_TX_OK;
  691. drop:
  692. dev->stats.tx_dropped++;
  693. dev_kfree_skb_any(skb);
  694. return NETDEV_TX_OK;
  695. }
  696. static int xennet_close(struct net_device *dev)
  697. {
  698. struct netfront_info *np = netdev_priv(dev);
  699. unsigned int num_queues = np->queues ? dev->real_num_tx_queues : 0;
  700. unsigned int i;
  701. struct netfront_queue *queue;
  702. netif_tx_stop_all_queues(np->netdev);
  703. for (i = 0; i < num_queues; ++i) {
  704. queue = &np->queues[i];
  705. napi_disable(&queue->napi);
  706. }
  707. return 0;
  708. }
  709. static void xennet_destroy_queues(struct netfront_info *info)
  710. {
  711. unsigned int i;
  712. if (!info->queues)
  713. return;
  714. for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
  715. struct netfront_queue *queue = &info->queues[i];
  716. if (netif_running(info->netdev))
  717. napi_disable(&queue->napi);
  718. netif_napi_del(&queue->napi);
  719. }
  720. kfree(info->queues);
  721. info->queues = NULL;
  722. }
  723. static void xennet_uninit(struct net_device *dev)
  724. {
  725. struct netfront_info *np = netdev_priv(dev);
  726. xennet_destroy_queues(np);
  727. }
  728. static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
  729. {
  730. unsigned long flags;
  731. spin_lock_irqsave(&queue->rx_cons_lock, flags);
  732. queue->rx.rsp_cons = val;
  733. queue->rx_rsp_unconsumed = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
  734. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  735. }
  736. static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
  737. grant_ref_t ref)
  738. {
  739. int new = xennet_rxidx(queue->rx.req_prod_pvt);
  740. BUG_ON(queue->rx_skbs[new]);
  741. queue->rx_skbs[new] = skb;
  742. queue->grant_rx_ref[new] = ref;
  743. RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
  744. RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
  745. queue->rx.req_prod_pvt++;
  746. }
  747. static int xennet_get_extras(struct netfront_queue *queue,
  748. struct xen_netif_extra_info *extras,
  749. RING_IDX rp)
  750. {
  751. struct xen_netif_extra_info extra;
  752. struct device *dev = &queue->info->netdev->dev;
  753. RING_IDX cons = queue->rx.rsp_cons;
  754. int err = 0;
  755. do {
  756. struct sk_buff *skb;
  757. grant_ref_t ref;
  758. if (unlikely(cons + 1 == rp)) {
  759. if (net_ratelimit())
  760. dev_warn(dev, "Missing extra info\n");
  761. err = -EBADR;
  762. break;
  763. }
  764. RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
  765. if (unlikely(!extra.type ||
  766. extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
  767. if (net_ratelimit())
  768. dev_warn(dev, "Invalid extra type: %d\n",
  769. extra.type);
  770. err = -EINVAL;
  771. } else {
  772. extras[extra.type - 1] = extra;
  773. }
  774. skb = xennet_get_rx_skb(queue, cons);
  775. ref = xennet_get_rx_ref(queue, cons);
  776. xennet_move_rx_slot(queue, skb, ref);
  777. } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
  778. xennet_set_rx_rsp_cons(queue, cons);
  779. return err;
  780. }
  781. static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
  782. struct xen_netif_rx_response *rx, struct bpf_prog *prog,
  783. struct xdp_buff *xdp, bool *need_xdp_flush)
  784. {
  785. struct xdp_frame *xdpf;
  786. u32 len = rx->status;
  787. u32 act;
  788. int err;
  789. xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
  790. &queue->xdp_rxq);
  791. xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
  792. len, false);
  793. act = bpf_prog_run_xdp(prog, xdp);
  794. switch (act) {
  795. case XDP_TX:
  796. xdpf = xdp_convert_buff_to_frame(xdp);
  797. if (unlikely(!xdpf)) {
  798. trace_xdp_exception(queue->info->netdev, prog, act);
  799. break;
  800. }
  801. get_page(pdata);
  802. err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
  803. if (unlikely(err <= 0)) {
  804. if (err < 0)
  805. trace_xdp_exception(queue->info->netdev, prog, act);
  806. xdp_return_frame_rx_napi(xdpf);
  807. }
  808. break;
  809. case XDP_REDIRECT:
  810. get_page(pdata);
  811. err = xdp_do_redirect(queue->info->netdev, xdp, prog);
  812. *need_xdp_flush = true;
  813. if (unlikely(err)) {
  814. trace_xdp_exception(queue->info->netdev, prog, act);
  815. xdp_return_buff(xdp);
  816. }
  817. break;
  818. case XDP_PASS:
  819. case XDP_DROP:
  820. break;
  821. case XDP_ABORTED:
  822. trace_xdp_exception(queue->info->netdev, prog, act);
  823. break;
  824. default:
  825. bpf_warn_invalid_xdp_action(queue->info->netdev, prog, act);
  826. }
  827. return act;
  828. }
  829. static int xennet_get_responses(struct netfront_queue *queue,
  830. struct netfront_rx_info *rinfo, RING_IDX rp,
  831. struct sk_buff_head *list,
  832. bool *need_xdp_flush)
  833. {
  834. struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
  835. int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
  836. RING_IDX cons = queue->rx.rsp_cons;
  837. struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
  838. struct xen_netif_extra_info *extras = rinfo->extras;
  839. grant_ref_t ref = xennet_get_rx_ref(queue, cons);
  840. struct device *dev = &queue->info->netdev->dev;
  841. struct bpf_prog *xdp_prog;
  842. struct xdp_buff xdp;
  843. int slots = 1;
  844. int err = 0;
  845. u32 verdict;
  846. if (rx->flags & XEN_NETRXF_extra_info) {
  847. err = xennet_get_extras(queue, extras, rp);
  848. if (!err) {
  849. if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) {
  850. struct xen_netif_extra_info *xdp;
  851. xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1];
  852. rx->offset = xdp->u.xdp.headroom;
  853. }
  854. }
  855. cons = queue->rx.rsp_cons;
  856. }
  857. for (;;) {
  858. /*
  859. * This definitely indicates a bug, either in this driver or in
  860. * the backend driver. In future this should flag the bad
  861. * situation to the system controller to reboot the backend.
  862. */
  863. if (ref == INVALID_GRANT_REF) {
  864. if (net_ratelimit())
  865. dev_warn(dev, "Bad rx response id %d.\n",
  866. rx->id);
  867. err = -EINVAL;
  868. goto next;
  869. }
  870. if (unlikely(rx->status < 0 ||
  871. rx->offset + rx->status > XEN_PAGE_SIZE)) {
  872. if (net_ratelimit())
  873. dev_warn(dev, "rx->offset: %u, size: %d\n",
  874. rx->offset, rx->status);
  875. xennet_move_rx_slot(queue, skb, ref);
  876. err = -EINVAL;
  877. goto next;
  878. }
  879. if (!gnttab_end_foreign_access_ref(ref)) {
  880. dev_alert(dev,
  881. "Grant still in use by backend domain\n");
  882. queue->info->broken = true;
  883. dev_alert(dev, "Disabled for further use\n");
  884. return -EINVAL;
  885. }
  886. gnttab_release_grant_reference(&queue->gref_rx_head, ref);
  887. rcu_read_lock();
  888. xdp_prog = rcu_dereference(queue->xdp_prog);
  889. if (xdp_prog) {
  890. if (!(rx->flags & XEN_NETRXF_more_data)) {
  891. /* currently only a single page contains data */
  892. verdict = xennet_run_xdp(queue,
  893. skb_frag_page(&skb_shinfo(skb)->frags[0]),
  894. rx, xdp_prog, &xdp, need_xdp_flush);
  895. if (verdict != XDP_PASS)
  896. err = -EINVAL;
  897. } else {
  898. /* drop the frame */
  899. err = -EINVAL;
  900. }
  901. }
  902. rcu_read_unlock();
  903. __skb_queue_tail(list, skb);
  904. next:
  905. if (!(rx->flags & XEN_NETRXF_more_data))
  906. break;
  907. if (cons + slots == rp) {
  908. if (net_ratelimit())
  909. dev_warn(dev, "Need more slots\n");
  910. err = -ENOENT;
  911. break;
  912. }
  913. RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
  914. rx = &rx_local;
  915. skb = xennet_get_rx_skb(queue, cons + slots);
  916. ref = xennet_get_rx_ref(queue, cons + slots);
  917. slots++;
  918. }
  919. if (unlikely(slots > max)) {
  920. if (net_ratelimit())
  921. dev_warn(dev, "Too many slots\n");
  922. err = -E2BIG;
  923. }
  924. if (unlikely(err))
  925. xennet_set_rx_rsp_cons(queue, cons + slots);
  926. return err;
  927. }
  928. static int xennet_set_skb_gso(struct sk_buff *skb,
  929. struct xen_netif_extra_info *gso)
  930. {
  931. if (!gso->u.gso.size) {
  932. if (net_ratelimit())
  933. pr_warn("GSO size must not be zero\n");
  934. return -EINVAL;
  935. }
  936. if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
  937. gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
  938. if (net_ratelimit())
  939. pr_warn("Bad GSO type %d\n", gso->u.gso.type);
  940. return -EINVAL;
  941. }
  942. skb_shinfo(skb)->gso_size = gso->u.gso.size;
  943. skb_shinfo(skb)->gso_type =
  944. (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
  945. SKB_GSO_TCPV4 :
  946. SKB_GSO_TCPV6;
  947. /* Header must be checked, and gso_segs computed. */
  948. skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
  949. skb_shinfo(skb)->gso_segs = 0;
  950. return 0;
  951. }
  952. static int xennet_fill_frags(struct netfront_queue *queue,
  953. struct sk_buff *skb,
  954. struct sk_buff_head *list)
  955. {
  956. RING_IDX cons = queue->rx.rsp_cons;
  957. struct sk_buff *nskb;
  958. while ((nskb = __skb_dequeue(list))) {
  959. struct xen_netif_rx_response rx;
  960. skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
  961. RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
  962. if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
  963. unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
  964. BUG_ON(pull_to < skb_headlen(skb));
  965. __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
  966. }
  967. if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
  968. xennet_set_rx_rsp_cons(queue,
  969. ++cons + skb_queue_len(list));
  970. kfree_skb(nskb);
  971. return -ENOENT;
  972. }
  973. skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
  974. skb_frag_page(nfrag),
  975. rx.offset, rx.status, PAGE_SIZE);
  976. skb_shinfo(nskb)->nr_frags = 0;
  977. kfree_skb(nskb);
  978. }
  979. xennet_set_rx_rsp_cons(queue, cons);
  980. return 0;
  981. }
  982. static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
  983. {
  984. bool recalculate_partial_csum = false;
  985. /*
  986. * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
  987. * peers can fail to set NETRXF_csum_blank when sending a GSO
  988. * frame. In this case force the SKB to CHECKSUM_PARTIAL and
  989. * recalculate the partial checksum.
  990. */
  991. if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
  992. struct netfront_info *np = netdev_priv(dev);
  993. atomic_inc(&np->rx_gso_checksum_fixup);
  994. skb->ip_summed = CHECKSUM_PARTIAL;
  995. recalculate_partial_csum = true;
  996. }
  997. /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
  998. if (skb->ip_summed != CHECKSUM_PARTIAL)
  999. return 0;
  1000. return skb_checksum_setup(skb, recalculate_partial_csum);
  1001. }
  1002. static int handle_incoming_queue(struct netfront_queue *queue,
  1003. struct sk_buff_head *rxq)
  1004. {
  1005. struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
  1006. int packets_dropped = 0;
  1007. struct sk_buff *skb;
  1008. while ((skb = __skb_dequeue(rxq)) != NULL) {
  1009. int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
  1010. if (pull_to > skb_headlen(skb))
  1011. __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
  1012. /* Ethernet work: Delayed to here as it peeks the header. */
  1013. skb->protocol = eth_type_trans(skb, queue->info->netdev);
  1014. skb_reset_network_header(skb);
  1015. if (checksum_setup(queue->info->netdev, skb)) {
  1016. kfree_skb(skb);
  1017. packets_dropped++;
  1018. queue->info->netdev->stats.rx_errors++;
  1019. continue;
  1020. }
  1021. u64_stats_update_begin(&rx_stats->syncp);
  1022. u64_stats_inc(&rx_stats->packets);
  1023. u64_stats_add(&rx_stats->bytes, skb->len);
  1024. u64_stats_update_end(&rx_stats->syncp);
  1025. /* Pass it up. */
  1026. napi_gro_receive(&queue->napi, skb);
  1027. }
  1028. return packets_dropped;
  1029. }
  1030. static int xennet_poll(struct napi_struct *napi, int budget)
  1031. {
  1032. struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
  1033. struct net_device *dev = queue->info->netdev;
  1034. struct sk_buff *skb;
  1035. struct netfront_rx_info rinfo;
  1036. struct xen_netif_rx_response *rx = &rinfo.rx;
  1037. struct xen_netif_extra_info *extras = rinfo.extras;
  1038. RING_IDX i, rp;
  1039. int work_done;
  1040. struct sk_buff_head rxq;
  1041. struct sk_buff_head errq;
  1042. struct sk_buff_head tmpq;
  1043. int err;
  1044. bool need_xdp_flush = false;
  1045. spin_lock(&queue->rx_lock);
  1046. skb_queue_head_init(&rxq);
  1047. skb_queue_head_init(&errq);
  1048. skb_queue_head_init(&tmpq);
  1049. rp = queue->rx.sring->rsp_prod;
  1050. if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
  1051. dev_alert(&dev->dev, "Illegal number of responses %u\n",
  1052. rp - queue->rx.rsp_cons);
  1053. queue->info->broken = true;
  1054. spin_unlock(&queue->rx_lock);
  1055. return 0;
  1056. }
  1057. rmb(); /* Ensure we see queued responses up to 'rp'. */
  1058. i = queue->rx.rsp_cons;
  1059. work_done = 0;
  1060. while ((i != rp) && (work_done < budget)) {
  1061. RING_COPY_RESPONSE(&queue->rx, i, rx);
  1062. memset(extras, 0, sizeof(rinfo.extras));
  1063. err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
  1064. &need_xdp_flush);
  1065. if (unlikely(err)) {
  1066. if (queue->info->broken) {
  1067. spin_unlock(&queue->rx_lock);
  1068. return 0;
  1069. }
  1070. err:
  1071. while ((skb = __skb_dequeue(&tmpq)))
  1072. __skb_queue_tail(&errq, skb);
  1073. dev->stats.rx_errors++;
  1074. i = queue->rx.rsp_cons;
  1075. continue;
  1076. }
  1077. skb = __skb_dequeue(&tmpq);
  1078. if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
  1079. struct xen_netif_extra_info *gso;
  1080. gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
  1081. if (unlikely(xennet_set_skb_gso(skb, gso))) {
  1082. __skb_queue_head(&tmpq, skb);
  1083. xennet_set_rx_rsp_cons(queue,
  1084. queue->rx.rsp_cons +
  1085. skb_queue_len(&tmpq));
  1086. goto err;
  1087. }
  1088. }
  1089. NETFRONT_SKB_CB(skb)->pull_to = rx->status;
  1090. if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
  1091. NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
  1092. skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset);
  1093. skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
  1094. skb->data_len = rx->status;
  1095. skb->len += rx->status;
  1096. if (unlikely(xennet_fill_frags(queue, skb, &tmpq)))
  1097. goto err;
  1098. if (rx->flags & XEN_NETRXF_csum_blank)
  1099. skb->ip_summed = CHECKSUM_PARTIAL;
  1100. else if (rx->flags & XEN_NETRXF_data_validated)
  1101. skb->ip_summed = CHECKSUM_UNNECESSARY;
  1102. __skb_queue_tail(&rxq, skb);
  1103. i = queue->rx.rsp_cons + 1;
  1104. xennet_set_rx_rsp_cons(queue, i);
  1105. work_done++;
  1106. }
  1107. if (need_xdp_flush)
  1108. xdp_do_flush();
  1109. __skb_queue_purge(&errq);
  1110. work_done -= handle_incoming_queue(queue, &rxq);
  1111. xennet_alloc_rx_buffers(queue);
  1112. if (work_done < budget) {
  1113. int more_to_do = 0;
  1114. napi_complete_done(napi, work_done);
  1115. RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
  1116. if (more_to_do)
  1117. napi_schedule(napi);
  1118. }
  1119. spin_unlock(&queue->rx_lock);
  1120. return work_done;
  1121. }
  1122. static int xennet_change_mtu(struct net_device *dev, int mtu)
  1123. {
  1124. int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
  1125. if (mtu > max)
  1126. return -EINVAL;
  1127. WRITE_ONCE(dev->mtu, mtu);
  1128. return 0;
  1129. }
  1130. static void xennet_get_stats64(struct net_device *dev,
  1131. struct rtnl_link_stats64 *tot)
  1132. {
  1133. struct netfront_info *np = netdev_priv(dev);
  1134. int cpu;
  1135. for_each_possible_cpu(cpu) {
  1136. struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
  1137. struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
  1138. u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
  1139. unsigned int start;
  1140. do {
  1141. start = u64_stats_fetch_begin(&tx_stats->syncp);
  1142. tx_packets = u64_stats_read(&tx_stats->packets);
  1143. tx_bytes = u64_stats_read(&tx_stats->bytes);
  1144. } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
  1145. do {
  1146. start = u64_stats_fetch_begin(&rx_stats->syncp);
  1147. rx_packets = u64_stats_read(&rx_stats->packets);
  1148. rx_bytes = u64_stats_read(&rx_stats->bytes);
  1149. } while (u64_stats_fetch_retry(&rx_stats->syncp, start));
  1150. tot->rx_packets += rx_packets;
  1151. tot->tx_packets += tx_packets;
  1152. tot->rx_bytes += rx_bytes;
  1153. tot->tx_bytes += tx_bytes;
  1154. }
  1155. tot->rx_errors = dev->stats.rx_errors;
  1156. tot->tx_dropped = dev->stats.tx_dropped;
  1157. }
  1158. static void xennet_release_tx_bufs(struct netfront_queue *queue)
  1159. {
  1160. struct sk_buff *skb;
  1161. int i;
  1162. for (i = 0; i < NET_TX_RING_SIZE; i++) {
  1163. /* Skip over entries which are actually freelist references */
  1164. if (!queue->tx_skbs[i])
  1165. continue;
  1166. skb = queue->tx_skbs[i];
  1167. queue->tx_skbs[i] = NULL;
  1168. get_page(queue->grant_tx_page[i]);
  1169. gnttab_end_foreign_access(queue->grant_tx_ref[i],
  1170. queue->grant_tx_page[i]);
  1171. queue->grant_tx_page[i] = NULL;
  1172. queue->grant_tx_ref[i] = INVALID_GRANT_REF;
  1173. add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
  1174. dev_kfree_skb_irq(skb);
  1175. }
  1176. }
  1177. static void xennet_release_rx_bufs(struct netfront_queue *queue)
  1178. {
  1179. int id, ref;
  1180. spin_lock_bh(&queue->rx_lock);
  1181. for (id = 0; id < NET_RX_RING_SIZE; id++) {
  1182. struct sk_buff *skb;
  1183. struct page *page;
  1184. skb = queue->rx_skbs[id];
  1185. if (!skb)
  1186. continue;
  1187. ref = queue->grant_rx_ref[id];
  1188. if (ref == INVALID_GRANT_REF)
  1189. continue;
  1190. page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
  1191. /* gnttab_end_foreign_access() needs a page ref until
  1192. * foreign access is ended (which may be deferred).
  1193. */
  1194. get_page(page);
  1195. gnttab_end_foreign_access(ref, page);
  1196. queue->grant_rx_ref[id] = INVALID_GRANT_REF;
  1197. kfree_skb(skb);
  1198. }
  1199. spin_unlock_bh(&queue->rx_lock);
  1200. }
  1201. static netdev_features_t xennet_fix_features(struct net_device *dev,
  1202. netdev_features_t features)
  1203. {
  1204. struct netfront_info *np = netdev_priv(dev);
  1205. if (features & NETIF_F_SG &&
  1206. !xenbus_read_unsigned(np->xbdev->otherend, "feature-sg", 0))
  1207. features &= ~NETIF_F_SG;
  1208. if (features & NETIF_F_IPV6_CSUM &&
  1209. !xenbus_read_unsigned(np->xbdev->otherend,
  1210. "feature-ipv6-csum-offload", 0))
  1211. features &= ~NETIF_F_IPV6_CSUM;
  1212. if (features & NETIF_F_TSO &&
  1213. !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv4", 0))
  1214. features &= ~NETIF_F_TSO;
  1215. if (features & NETIF_F_TSO6 &&
  1216. !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv6", 0))
  1217. features &= ~NETIF_F_TSO6;
  1218. return features;
  1219. }
  1220. static int xennet_set_features(struct net_device *dev,
  1221. netdev_features_t features)
  1222. {
  1223. if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
  1224. netdev_info(dev, "Reducing MTU because no SG offload");
  1225. dev->mtu = ETH_DATA_LEN;
  1226. }
  1227. return 0;
  1228. }
  1229. static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
  1230. {
  1231. unsigned long flags;
  1232. if (unlikely(queue->info->broken))
  1233. return false;
  1234. spin_lock_irqsave(&queue->tx_lock, flags);
  1235. if (xennet_tx_buf_gc(queue))
  1236. *eoi = 0;
  1237. spin_unlock_irqrestore(&queue->tx_lock, flags);
  1238. return true;
  1239. }
  1240. static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
  1241. {
  1242. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1243. if (likely(xennet_handle_tx(dev_id, &eoiflag)))
  1244. xen_irq_lateeoi(irq, eoiflag);
  1245. return IRQ_HANDLED;
  1246. }
  1247. static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
  1248. {
  1249. unsigned int work_queued;
  1250. unsigned long flags;
  1251. if (unlikely(queue->info->broken))
  1252. return false;
  1253. spin_lock_irqsave(&queue->rx_cons_lock, flags);
  1254. work_queued = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
  1255. if (work_queued > queue->rx_rsp_unconsumed) {
  1256. queue->rx_rsp_unconsumed = work_queued;
  1257. *eoi = 0;
  1258. } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
  1259. const struct device *dev = &queue->info->netdev->dev;
  1260. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  1261. dev_alert(dev, "RX producer index going backwards\n");
  1262. dev_alert(dev, "Disabled for further use\n");
  1263. queue->info->broken = true;
  1264. return false;
  1265. }
  1266. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  1267. if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
  1268. napi_schedule(&queue->napi);
  1269. return true;
  1270. }
  1271. static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
  1272. {
  1273. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1274. if (likely(xennet_handle_rx(dev_id, &eoiflag)))
  1275. xen_irq_lateeoi(irq, eoiflag);
  1276. return IRQ_HANDLED;
  1277. }
  1278. static irqreturn_t xennet_interrupt(int irq, void *dev_id)
  1279. {
  1280. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1281. if (xennet_handle_tx(dev_id, &eoiflag) &&
  1282. xennet_handle_rx(dev_id, &eoiflag))
  1283. xen_irq_lateeoi(irq, eoiflag);
  1284. return IRQ_HANDLED;
  1285. }
  1286. #ifdef CONFIG_NET_POLL_CONTROLLER
  1287. static void xennet_poll_controller(struct net_device *dev)
  1288. {
  1289. /* Poll each queue */
  1290. struct netfront_info *info = netdev_priv(dev);
  1291. unsigned int num_queues = dev->real_num_tx_queues;
  1292. unsigned int i;
  1293. if (info->broken)
  1294. return;
  1295. for (i = 0; i < num_queues; ++i)
  1296. xennet_interrupt(0, &info->queues[i]);
  1297. }
  1298. #endif
  1299. #define NETBACK_XDP_HEADROOM_DISABLE 0
  1300. #define NETBACK_XDP_HEADROOM_ENABLE 1
  1301. static int talk_to_netback_xdp(struct netfront_info *np, int xdp)
  1302. {
  1303. int err;
  1304. unsigned short headroom;
  1305. headroom = xdp ? XDP_PACKET_HEADROOM : 0;
  1306. err = xenbus_printf(XBT_NIL, np->xbdev->nodename,
  1307. "xdp-headroom", "%hu",
  1308. headroom);
  1309. if (err)
  1310. pr_warn("Error writing xdp-headroom\n");
  1311. return err;
  1312. }
  1313. static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
  1314. struct netlink_ext_ack *extack)
  1315. {
  1316. unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
  1317. struct netfront_info *np = netdev_priv(dev);
  1318. struct bpf_prog *old_prog;
  1319. unsigned int i, err;
  1320. if (dev->mtu > max_mtu) {
  1321. netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu);
  1322. return -EINVAL;
  1323. }
  1324. if (!np->netback_has_xdp_headroom)
  1325. return 0;
  1326. xenbus_switch_state(np->xbdev, XenbusStateReconfiguring);
  1327. err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE :
  1328. NETBACK_XDP_HEADROOM_DISABLE);
  1329. if (err)
  1330. return err;
  1331. /* avoid the race with XDP headroom adjustment */
  1332. wait_event(module_wq,
  1333. xenbus_read_driver_state(np->xbdev, np->xbdev->otherend) ==
  1334. XenbusStateReconfigured);
  1335. np->netfront_xdp_enabled = true;
  1336. old_prog = rtnl_dereference(np->queues[0].xdp_prog);
  1337. if (prog)
  1338. bpf_prog_add(prog, dev->real_num_tx_queues);
  1339. for (i = 0; i < dev->real_num_tx_queues; ++i)
  1340. rcu_assign_pointer(np->queues[i].xdp_prog, prog);
  1341. if (old_prog)
  1342. for (i = 0; i < dev->real_num_tx_queues; ++i)
  1343. bpf_prog_put(old_prog);
  1344. xenbus_switch_state(np->xbdev, XenbusStateConnected);
  1345. return 0;
  1346. }
  1347. static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  1348. {
  1349. struct netfront_info *np = netdev_priv(dev);
  1350. if (np->broken)
  1351. return -ENODEV;
  1352. switch (xdp->command) {
  1353. case XDP_SETUP_PROG:
  1354. return xennet_xdp_set(dev, xdp->prog, xdp->extack);
  1355. default:
  1356. return -EINVAL;
  1357. }
  1358. }
  1359. static const struct net_device_ops xennet_netdev_ops = {
  1360. .ndo_uninit = xennet_uninit,
  1361. .ndo_open = xennet_open,
  1362. .ndo_stop = xennet_close,
  1363. .ndo_start_xmit = xennet_start_xmit,
  1364. .ndo_change_mtu = xennet_change_mtu,
  1365. .ndo_get_stats64 = xennet_get_stats64,
  1366. .ndo_set_mac_address = eth_mac_addr,
  1367. .ndo_validate_addr = eth_validate_addr,
  1368. .ndo_fix_features = xennet_fix_features,
  1369. .ndo_set_features = xennet_set_features,
  1370. .ndo_select_queue = xennet_select_queue,
  1371. .ndo_bpf = xennet_xdp,
  1372. .ndo_xdp_xmit = xennet_xdp_xmit,
  1373. #ifdef CONFIG_NET_POLL_CONTROLLER
  1374. .ndo_poll_controller = xennet_poll_controller,
  1375. #endif
  1376. };
  1377. static void xennet_free_netdev(struct net_device *netdev)
  1378. {
  1379. struct netfront_info *np = netdev_priv(netdev);
  1380. free_percpu(np->rx_stats);
  1381. free_percpu(np->tx_stats);
  1382. free_netdev(netdev);
  1383. }
  1384. static struct net_device *xennet_create_dev(struct xenbus_device *dev)
  1385. {
  1386. int err;
  1387. struct net_device *netdev;
  1388. struct netfront_info *np;
  1389. netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
  1390. if (!netdev)
  1391. return ERR_PTR(-ENOMEM);
  1392. np = netdev_priv(netdev);
  1393. np->xbdev = dev;
  1394. np->queues = NULL;
  1395. err = -ENOMEM;
  1396. np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
  1397. if (np->rx_stats == NULL)
  1398. goto exit;
  1399. np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
  1400. if (np->tx_stats == NULL)
  1401. goto exit;
  1402. netdev->netdev_ops = &xennet_netdev_ops;
  1403. netdev->features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
  1404. NETIF_F_GSO_ROBUST;
  1405. netdev->hw_features = NETIF_F_SG |
  1406. NETIF_F_IPV6_CSUM |
  1407. NETIF_F_TSO | NETIF_F_TSO6;
  1408. /*
  1409. * Assume that all hw features are available for now. This set
  1410. * will be adjusted by the call to netdev_update_features() in
  1411. * xennet_connect() which is the earliest point where we can
  1412. * negotiate with the backend regarding supported features.
  1413. */
  1414. netdev->features |= netdev->hw_features;
  1415. netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
  1416. NETDEV_XDP_ACT_NDO_XMIT;
  1417. netdev->ethtool_ops = &xennet_ethtool_ops;
  1418. netdev->min_mtu = ETH_MIN_MTU;
  1419. netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE;
  1420. SET_NETDEV_DEV(netdev, &dev->dev);
  1421. np->netdev = netdev;
  1422. np->netfront_xdp_enabled = false;
  1423. netif_carrier_off(netdev);
  1424. do {
  1425. xenbus_switch_state(dev, XenbusStateInitialising);
  1426. err = wait_event_timeout(module_wq,
  1427. xenbus_read_driver_state(dev, dev->otherend) !=
  1428. XenbusStateClosed &&
  1429. xenbus_read_driver_state(dev, dev->otherend) !=
  1430. XenbusStateUnknown, XENNET_TIMEOUT);
  1431. } while (!err);
  1432. return netdev;
  1433. exit:
  1434. xennet_free_netdev(netdev);
  1435. return ERR_PTR(err);
  1436. }
  1437. /*
  1438. * Entry point to this code when a new device is created. Allocate the basic
  1439. * structures and the ring buffers for communication with the backend, and
  1440. * inform the backend of the appropriate details for those.
  1441. */
  1442. static int netfront_probe(struct xenbus_device *dev,
  1443. const struct xenbus_device_id *id)
  1444. {
  1445. int err;
  1446. struct net_device *netdev;
  1447. struct netfront_info *info;
  1448. netdev = xennet_create_dev(dev);
  1449. if (IS_ERR(netdev)) {
  1450. err = PTR_ERR(netdev);
  1451. xenbus_dev_fatal(dev, err, "creating netdev");
  1452. return err;
  1453. }
  1454. info = netdev_priv(netdev);
  1455. dev_set_drvdata(&dev->dev, info);
  1456. #ifdef CONFIG_SYSFS
  1457. info->netdev->sysfs_groups[0] = &xennet_dev_group;
  1458. #endif
  1459. return 0;
  1460. }
  1461. static void xennet_end_access(int ref, void *page)
  1462. {
  1463. /* This frees the page as a side-effect */
  1464. if (ref != INVALID_GRANT_REF)
  1465. gnttab_end_foreign_access(ref, virt_to_page(page));
  1466. }
  1467. static void xennet_disconnect_backend(struct netfront_info *info)
  1468. {
  1469. unsigned int i = 0;
  1470. unsigned int num_queues = info->netdev->real_num_tx_queues;
  1471. netif_carrier_off(info->netdev);
  1472. for (i = 0; i < num_queues && info->queues; ++i) {
  1473. struct netfront_queue *queue = &info->queues[i];
  1474. timer_delete_sync(&queue->rx_refill_timer);
  1475. if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
  1476. unbind_from_irqhandler(queue->tx_irq, queue);
  1477. if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
  1478. unbind_from_irqhandler(queue->tx_irq, queue);
  1479. unbind_from_irqhandler(queue->rx_irq, queue);
  1480. }
  1481. queue->tx_evtchn = queue->rx_evtchn = 0;
  1482. queue->tx_irq = queue->rx_irq = 0;
  1483. if (netif_running(info->netdev))
  1484. napi_synchronize(&queue->napi);
  1485. xennet_release_tx_bufs(queue);
  1486. xennet_release_rx_bufs(queue);
  1487. gnttab_free_grant_references(queue->gref_tx_head);
  1488. gnttab_free_grant_references(queue->gref_rx_head);
  1489. /* End access and free the pages */
  1490. xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
  1491. xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
  1492. queue->tx_ring_ref = INVALID_GRANT_REF;
  1493. queue->rx_ring_ref = INVALID_GRANT_REF;
  1494. queue->tx.sring = NULL;
  1495. queue->rx.sring = NULL;
  1496. page_pool_destroy(queue->page_pool);
  1497. }
  1498. }
  1499. /*
  1500. * We are reconnecting to the backend, due to a suspend/resume, or a backend
  1501. * driver restart. We tear down our netif structure and recreate it, but
  1502. * leave the device-layer structures intact so that this is transparent to the
  1503. * rest of the kernel.
  1504. */
  1505. static int netfront_resume(struct xenbus_device *dev)
  1506. {
  1507. struct netfront_info *info = dev_get_drvdata(&dev->dev);
  1508. dev_dbg(&dev->dev, "%s\n", dev->nodename);
  1509. netif_tx_lock_bh(info->netdev);
  1510. netif_device_detach(info->netdev);
  1511. netif_tx_unlock_bh(info->netdev);
  1512. xennet_disconnect_backend(info);
  1513. rtnl_lock();
  1514. if (info->queues)
  1515. xennet_destroy_queues(info);
  1516. rtnl_unlock();
  1517. return 0;
  1518. }
  1519. static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
  1520. {
  1521. char *s, *e, *macstr;
  1522. int i;
  1523. macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
  1524. if (IS_ERR(macstr))
  1525. return PTR_ERR(macstr);
  1526. for (i = 0; i < ETH_ALEN; i++) {
  1527. mac[i] = simple_strtoul(s, &e, 16);
  1528. if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
  1529. kfree(macstr);
  1530. return -ENOENT;
  1531. }
  1532. s = e+1;
  1533. }
  1534. kfree(macstr);
  1535. return 0;
  1536. }
  1537. static int setup_netfront_single(struct netfront_queue *queue)
  1538. {
  1539. int err;
  1540. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
  1541. if (err < 0)
  1542. goto fail;
  1543. err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
  1544. xennet_interrupt, 0,
  1545. queue->info->netdev->name,
  1546. queue);
  1547. if (err < 0)
  1548. goto bind_fail;
  1549. queue->rx_evtchn = queue->tx_evtchn;
  1550. queue->rx_irq = queue->tx_irq = err;
  1551. return 0;
  1552. bind_fail:
  1553. xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
  1554. queue->tx_evtchn = 0;
  1555. fail:
  1556. return err;
  1557. }
  1558. static int setup_netfront_split(struct netfront_queue *queue)
  1559. {
  1560. int err;
  1561. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
  1562. if (err < 0)
  1563. goto fail;
  1564. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
  1565. if (err < 0)
  1566. goto alloc_rx_evtchn_fail;
  1567. snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
  1568. "%s-tx", queue->name);
  1569. err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
  1570. xennet_tx_interrupt, 0,
  1571. queue->tx_irq_name, queue);
  1572. if (err < 0)
  1573. goto bind_tx_fail;
  1574. queue->tx_irq = err;
  1575. snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
  1576. "%s-rx", queue->name);
  1577. err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
  1578. xennet_rx_interrupt, 0,
  1579. queue->rx_irq_name, queue);
  1580. if (err < 0)
  1581. goto bind_rx_fail;
  1582. queue->rx_irq = err;
  1583. return 0;
  1584. bind_rx_fail:
  1585. unbind_from_irqhandler(queue->tx_irq, queue);
  1586. queue->tx_irq = 0;
  1587. bind_tx_fail:
  1588. xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
  1589. queue->rx_evtchn = 0;
  1590. alloc_rx_evtchn_fail:
  1591. xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
  1592. queue->tx_evtchn = 0;
  1593. fail:
  1594. return err;
  1595. }
  1596. static int setup_netfront(struct xenbus_device *dev,
  1597. struct netfront_queue *queue, unsigned int feature_split_evtchn)
  1598. {
  1599. struct xen_netif_tx_sring *txs;
  1600. struct xen_netif_rx_sring *rxs;
  1601. int err;
  1602. queue->tx_ring_ref = INVALID_GRANT_REF;
  1603. queue->rx_ring_ref = INVALID_GRANT_REF;
  1604. queue->rx.sring = NULL;
  1605. queue->tx.sring = NULL;
  1606. err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&txs,
  1607. 1, &queue->tx_ring_ref);
  1608. if (err)
  1609. goto fail;
  1610. XEN_FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
  1611. err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&rxs,
  1612. 1, &queue->rx_ring_ref);
  1613. if (err)
  1614. goto fail;
  1615. XEN_FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
  1616. if (feature_split_evtchn)
  1617. err = setup_netfront_split(queue);
  1618. /* setup single event channel if
  1619. * a) feature-split-event-channels == 0
  1620. * b) feature-split-event-channels == 1 but failed to setup
  1621. */
  1622. if (!feature_split_evtchn || err)
  1623. err = setup_netfront_single(queue);
  1624. if (err)
  1625. goto fail;
  1626. return 0;
  1627. fail:
  1628. xenbus_teardown_ring((void **)&queue->rx.sring, 1, &queue->rx_ring_ref);
  1629. xenbus_teardown_ring((void **)&queue->tx.sring, 1, &queue->tx_ring_ref);
  1630. return err;
  1631. }
  1632. /* Queue-specific initialisation
  1633. * This used to be done in xennet_create_dev() but must now
  1634. * be run per-queue.
  1635. */
  1636. static int xennet_init_queue(struct netfront_queue *queue)
  1637. {
  1638. unsigned short i;
  1639. int err = 0;
  1640. char *devid;
  1641. spin_lock_init(&queue->tx_lock);
  1642. spin_lock_init(&queue->rx_lock);
  1643. spin_lock_init(&queue->rx_cons_lock);
  1644. timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
  1645. devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
  1646. snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
  1647. devid, queue->id);
  1648. /* Initialise tx_skb_freelist as a free chain containing every entry. */
  1649. queue->tx_skb_freelist = 0;
  1650. queue->tx_pend_queue = TX_LINK_NONE;
  1651. for (i = 0; i < NET_TX_RING_SIZE; i++) {
  1652. queue->tx_link[i] = i + 1;
  1653. queue->grant_tx_ref[i] = INVALID_GRANT_REF;
  1654. queue->grant_tx_page[i] = NULL;
  1655. }
  1656. queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
  1657. /* Clear out rx_skbs */
  1658. for (i = 0; i < NET_RX_RING_SIZE; i++) {
  1659. queue->rx_skbs[i] = NULL;
  1660. queue->grant_rx_ref[i] = INVALID_GRANT_REF;
  1661. }
  1662. /* A grant for every tx ring slot */
  1663. if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
  1664. &queue->gref_tx_head) < 0) {
  1665. pr_alert("can't alloc tx grant refs\n");
  1666. err = -ENOMEM;
  1667. goto exit;
  1668. }
  1669. /* A grant for every rx ring slot */
  1670. if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
  1671. &queue->gref_rx_head) < 0) {
  1672. pr_alert("can't alloc rx grant refs\n");
  1673. err = -ENOMEM;
  1674. goto exit_free_tx;
  1675. }
  1676. return 0;
  1677. exit_free_tx:
  1678. gnttab_free_grant_references(queue->gref_tx_head);
  1679. exit:
  1680. return err;
  1681. }
  1682. static int write_queue_xenstore_keys(struct netfront_queue *queue,
  1683. struct xenbus_transaction *xbt, int write_hierarchical)
  1684. {
  1685. /* Write the queue-specific keys into XenStore in the traditional
  1686. * way for a single queue, or in a queue subkeys for multiple
  1687. * queues.
  1688. */
  1689. struct xenbus_device *dev = queue->info->xbdev;
  1690. int err;
  1691. const char *message;
  1692. char *path;
  1693. size_t pathsize;
  1694. /* Choose the correct place to write the keys */
  1695. if (write_hierarchical) {
  1696. pathsize = strlen(dev->nodename) + 10;
  1697. path = kzalloc(pathsize, GFP_KERNEL);
  1698. if (!path) {
  1699. err = -ENOMEM;
  1700. message = "out of memory while writing ring references";
  1701. goto error;
  1702. }
  1703. snprintf(path, pathsize, "%s/queue-%u",
  1704. dev->nodename, queue->id);
  1705. } else {
  1706. path = (char *)dev->nodename;
  1707. }
  1708. /* Write ring references */
  1709. err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
  1710. queue->tx_ring_ref);
  1711. if (err) {
  1712. message = "writing tx-ring-ref";
  1713. goto error;
  1714. }
  1715. err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
  1716. queue->rx_ring_ref);
  1717. if (err) {
  1718. message = "writing rx-ring-ref";
  1719. goto error;
  1720. }
  1721. /* Write event channels; taking into account both shared
  1722. * and split event channel scenarios.
  1723. */
  1724. if (queue->tx_evtchn == queue->rx_evtchn) {
  1725. /* Shared event channel */
  1726. err = xenbus_printf(*xbt, path,
  1727. "event-channel", "%u", queue->tx_evtchn);
  1728. if (err) {
  1729. message = "writing event-channel";
  1730. goto error;
  1731. }
  1732. } else {
  1733. /* Split event channels */
  1734. err = xenbus_printf(*xbt, path,
  1735. "event-channel-tx", "%u", queue->tx_evtchn);
  1736. if (err) {
  1737. message = "writing event-channel-tx";
  1738. goto error;
  1739. }
  1740. err = xenbus_printf(*xbt, path,
  1741. "event-channel-rx", "%u", queue->rx_evtchn);
  1742. if (err) {
  1743. message = "writing event-channel-rx";
  1744. goto error;
  1745. }
  1746. }
  1747. if (write_hierarchical)
  1748. kfree(path);
  1749. return 0;
  1750. error:
  1751. if (write_hierarchical)
  1752. kfree(path);
  1753. xenbus_dev_fatal(dev, err, "%s", message);
  1754. return err;
  1755. }
  1756. static int xennet_create_page_pool(struct netfront_queue *queue)
  1757. {
  1758. int err;
  1759. struct page_pool_params pp_params = {
  1760. .order = 0,
  1761. .flags = 0,
  1762. .pool_size = NET_RX_RING_SIZE,
  1763. .nid = NUMA_NO_NODE,
  1764. .dev = &queue->info->netdev->dev,
  1765. .offset = XDP_PACKET_HEADROOM,
  1766. .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
  1767. };
  1768. queue->page_pool = page_pool_create(&pp_params);
  1769. if (IS_ERR(queue->page_pool)) {
  1770. err = PTR_ERR(queue->page_pool);
  1771. queue->page_pool = NULL;
  1772. return err;
  1773. }
  1774. err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev,
  1775. queue->id, 0);
  1776. if (err) {
  1777. netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n");
  1778. goto err_free_pp;
  1779. }
  1780. err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq,
  1781. MEM_TYPE_PAGE_POOL, queue->page_pool);
  1782. if (err) {
  1783. netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n");
  1784. goto err_unregister_rxq;
  1785. }
  1786. return 0;
  1787. err_unregister_rxq:
  1788. xdp_rxq_info_unreg(&queue->xdp_rxq);
  1789. err_free_pp:
  1790. page_pool_destroy(queue->page_pool);
  1791. queue->page_pool = NULL;
  1792. return err;
  1793. }
  1794. static int xennet_create_queues(struct netfront_info *info,
  1795. unsigned int *num_queues)
  1796. {
  1797. unsigned int i;
  1798. int ret;
  1799. info->queues = kzalloc_objs(struct netfront_queue, *num_queues);
  1800. if (!info->queues)
  1801. return -ENOMEM;
  1802. for (i = 0; i < *num_queues; i++) {
  1803. struct netfront_queue *queue = &info->queues[i];
  1804. queue->id = i;
  1805. queue->info = info;
  1806. ret = xennet_init_queue(queue);
  1807. if (ret < 0) {
  1808. dev_warn(&info->xbdev->dev,
  1809. "only created %d queues\n", i);
  1810. *num_queues = i;
  1811. break;
  1812. }
  1813. /* use page pool recycling instead of buddy allocator */
  1814. ret = xennet_create_page_pool(queue);
  1815. if (ret < 0) {
  1816. dev_err(&info->xbdev->dev, "can't allocate page pool\n");
  1817. *num_queues = i;
  1818. return ret;
  1819. }
  1820. netif_napi_add(queue->info->netdev, &queue->napi, xennet_poll);
  1821. if (netif_running(info->netdev))
  1822. napi_enable(&queue->napi);
  1823. }
  1824. netif_set_real_num_tx_queues(info->netdev, *num_queues);
  1825. if (*num_queues == 0) {
  1826. dev_err(&info->xbdev->dev, "no queues\n");
  1827. return -EINVAL;
  1828. }
  1829. return 0;
  1830. }
  1831. /* Common code used when first setting up, and when resuming. */
  1832. static int talk_to_netback(struct xenbus_device *dev,
  1833. struct netfront_info *info)
  1834. {
  1835. const char *message;
  1836. struct xenbus_transaction xbt;
  1837. int err;
  1838. unsigned int feature_split_evtchn;
  1839. unsigned int i = 0;
  1840. unsigned int max_queues = 0;
  1841. struct netfront_queue *queue = NULL;
  1842. unsigned int num_queues = 1;
  1843. u8 addr[ETH_ALEN];
  1844. info->netdev->irq = 0;
  1845. /* Check if backend is trusted. */
  1846. info->bounce = !xennet_trusted ||
  1847. !xenbus_read_unsigned(dev->nodename, "trusted", 1);
  1848. /* Check if backend supports multiple queues */
  1849. max_queues = xenbus_read_unsigned(info->xbdev->otherend,
  1850. "multi-queue-max-queues", 1);
  1851. num_queues = min(max_queues, xennet_max_queues);
  1852. /* Check feature-split-event-channels */
  1853. feature_split_evtchn = xenbus_read_unsigned(info->xbdev->otherend,
  1854. "feature-split-event-channels", 0);
  1855. /* Read mac addr. */
  1856. err = xen_net_read_mac(dev, addr);
  1857. if (err) {
  1858. xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
  1859. goto out_unlocked;
  1860. }
  1861. eth_hw_addr_set(info->netdev, addr);
  1862. info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend,
  1863. "feature-xdp-headroom", 0);
  1864. if (info->netback_has_xdp_headroom) {
  1865. /* set the current xen-netfront xdp state */
  1866. err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ?
  1867. NETBACK_XDP_HEADROOM_ENABLE :
  1868. NETBACK_XDP_HEADROOM_DISABLE);
  1869. if (err)
  1870. goto out_unlocked;
  1871. }
  1872. rtnl_lock();
  1873. if (info->queues)
  1874. xennet_destroy_queues(info);
  1875. /* For the case of a reconnect reset the "broken" indicator. */
  1876. info->broken = false;
  1877. err = xennet_create_queues(info, &num_queues);
  1878. if (err < 0) {
  1879. xenbus_dev_fatal(dev, err, "creating queues");
  1880. kfree(info->queues);
  1881. info->queues = NULL;
  1882. goto out;
  1883. }
  1884. rtnl_unlock();
  1885. /* Create shared ring, alloc event channel -- for each queue */
  1886. for (i = 0; i < num_queues; ++i) {
  1887. queue = &info->queues[i];
  1888. err = setup_netfront(dev, queue, feature_split_evtchn);
  1889. if (err)
  1890. goto destroy_ring;
  1891. }
  1892. again:
  1893. err = xenbus_transaction_start(&xbt);
  1894. if (err) {
  1895. xenbus_dev_fatal(dev, err, "starting transaction");
  1896. goto destroy_ring;
  1897. }
  1898. if (xenbus_exists(XBT_NIL,
  1899. info->xbdev->otherend, "multi-queue-max-queues")) {
  1900. /* Write the number of queues */
  1901. err = xenbus_printf(xbt, dev->nodename,
  1902. "multi-queue-num-queues", "%u", num_queues);
  1903. if (err) {
  1904. message = "writing multi-queue-num-queues";
  1905. goto abort_transaction_no_dev_fatal;
  1906. }
  1907. }
  1908. if (num_queues == 1) {
  1909. err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
  1910. if (err)
  1911. goto abort_transaction_no_dev_fatal;
  1912. } else {
  1913. /* Write the keys for each queue */
  1914. for (i = 0; i < num_queues; ++i) {
  1915. queue = &info->queues[i];
  1916. err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
  1917. if (err)
  1918. goto abort_transaction_no_dev_fatal;
  1919. }
  1920. }
  1921. /* The remaining keys are not queue-specific */
  1922. err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
  1923. 1);
  1924. if (err) {
  1925. message = "writing request-rx-copy";
  1926. goto abort_transaction;
  1927. }
  1928. err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
  1929. if (err) {
  1930. message = "writing feature-rx-notify";
  1931. goto abort_transaction;
  1932. }
  1933. err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
  1934. if (err) {
  1935. message = "writing feature-sg";
  1936. goto abort_transaction;
  1937. }
  1938. err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
  1939. if (err) {
  1940. message = "writing feature-gso-tcpv4";
  1941. goto abort_transaction;
  1942. }
  1943. err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
  1944. if (err) {
  1945. message = "writing feature-gso-tcpv6";
  1946. goto abort_transaction;
  1947. }
  1948. err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
  1949. "1");
  1950. if (err) {
  1951. message = "writing feature-ipv6-csum-offload";
  1952. goto abort_transaction;
  1953. }
  1954. err = xenbus_transaction_end(xbt, 0);
  1955. if (err) {
  1956. if (err == -EAGAIN)
  1957. goto again;
  1958. xenbus_dev_fatal(dev, err, "completing transaction");
  1959. goto destroy_ring;
  1960. }
  1961. return 0;
  1962. abort_transaction:
  1963. xenbus_dev_fatal(dev, err, "%s", message);
  1964. abort_transaction_no_dev_fatal:
  1965. xenbus_transaction_end(xbt, 1);
  1966. destroy_ring:
  1967. xennet_disconnect_backend(info);
  1968. rtnl_lock();
  1969. xennet_destroy_queues(info);
  1970. out:
  1971. rtnl_unlock();
  1972. out_unlocked:
  1973. device_unregister(&dev->dev);
  1974. return err;
  1975. }
  1976. static int xennet_connect(struct net_device *dev)
  1977. {
  1978. struct netfront_info *np = netdev_priv(dev);
  1979. unsigned int num_queues = 0;
  1980. int err;
  1981. unsigned int j = 0;
  1982. struct netfront_queue *queue = NULL;
  1983. if (!xenbus_read_unsigned(np->xbdev->otherend, "feature-rx-copy", 0)) {
  1984. dev_info(&dev->dev,
  1985. "backend does not support copying receive path\n");
  1986. return -ENODEV;
  1987. }
  1988. err = talk_to_netback(np->xbdev, np);
  1989. if (err)
  1990. return err;
  1991. if (np->netback_has_xdp_headroom)
  1992. pr_info("backend supports XDP headroom\n");
  1993. if (np->bounce)
  1994. dev_info(&np->xbdev->dev,
  1995. "bouncing transmitted data to zeroed pages\n");
  1996. /* talk_to_netback() sets the correct number of queues */
  1997. num_queues = dev->real_num_tx_queues;
  1998. if (dev->reg_state == NETREG_UNINITIALIZED) {
  1999. err = register_netdev(dev);
  2000. if (err) {
  2001. pr_warn("%s: register_netdev err=%d\n", __func__, err);
  2002. device_unregister(&np->xbdev->dev);
  2003. return err;
  2004. }
  2005. }
  2006. rtnl_lock();
  2007. netdev_update_features(dev);
  2008. rtnl_unlock();
  2009. /*
  2010. * All public and private state should now be sane. Get
  2011. * ready to start sending and receiving packets and give the driver
  2012. * domain a kick because we've probably just requeued some
  2013. * packets.
  2014. */
  2015. netif_tx_lock_bh(np->netdev);
  2016. netif_device_attach(np->netdev);
  2017. netif_tx_unlock_bh(np->netdev);
  2018. netif_carrier_on(np->netdev);
  2019. for (j = 0; j < num_queues; ++j) {
  2020. queue = &np->queues[j];
  2021. notify_remote_via_irq(queue->tx_irq);
  2022. if (queue->tx_irq != queue->rx_irq)
  2023. notify_remote_via_irq(queue->rx_irq);
  2024. spin_lock_bh(&queue->rx_lock);
  2025. xennet_alloc_rx_buffers(queue);
  2026. spin_unlock_bh(&queue->rx_lock);
  2027. }
  2028. return 0;
  2029. }
  2030. /*
  2031. * Callback received when the backend's state changes.
  2032. */
  2033. static void netback_changed(struct xenbus_device *dev,
  2034. enum xenbus_state backend_state)
  2035. {
  2036. struct netfront_info *np = dev_get_drvdata(&dev->dev);
  2037. struct net_device *netdev = np->netdev;
  2038. dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
  2039. wake_up_all(&module_wq);
  2040. switch (backend_state) {
  2041. case XenbusStateInitialising:
  2042. case XenbusStateInitialised:
  2043. case XenbusStateReconfiguring:
  2044. case XenbusStateReconfigured:
  2045. case XenbusStateUnknown:
  2046. break;
  2047. case XenbusStateInitWait:
  2048. if (dev->state != XenbusStateInitialising)
  2049. break;
  2050. if (xennet_connect(netdev) != 0)
  2051. break;
  2052. xenbus_switch_state(dev, XenbusStateConnected);
  2053. break;
  2054. case XenbusStateConnected:
  2055. netdev_notify_peers(netdev);
  2056. break;
  2057. case XenbusStateClosed:
  2058. if (dev->state == XenbusStateClosed)
  2059. break;
  2060. fallthrough; /* Missed the backend's CLOSING state */
  2061. case XenbusStateClosing:
  2062. xenbus_frontend_closed(dev);
  2063. break;
  2064. }
  2065. }
  2066. static const struct xennet_stat {
  2067. char name[ETH_GSTRING_LEN];
  2068. u16 offset;
  2069. } xennet_stats[] = {
  2070. {
  2071. "rx_gso_checksum_fixup",
  2072. offsetof(struct netfront_info, rx_gso_checksum_fixup)
  2073. },
  2074. };
  2075. static int xennet_get_sset_count(struct net_device *dev, int string_set)
  2076. {
  2077. switch (string_set) {
  2078. case ETH_SS_STATS:
  2079. return ARRAY_SIZE(xennet_stats);
  2080. default:
  2081. return -EINVAL;
  2082. }
  2083. }
  2084. static void xennet_get_ethtool_stats(struct net_device *dev,
  2085. struct ethtool_stats *stats, u64 * data)
  2086. {
  2087. void *np = netdev_priv(dev);
  2088. int i;
  2089. for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
  2090. data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
  2091. }
  2092. static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
  2093. {
  2094. int i;
  2095. switch (stringset) {
  2096. case ETH_SS_STATS:
  2097. for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
  2098. memcpy(data + i * ETH_GSTRING_LEN,
  2099. xennet_stats[i].name, ETH_GSTRING_LEN);
  2100. break;
  2101. }
  2102. }
  2103. static const struct ethtool_ops xennet_ethtool_ops =
  2104. {
  2105. .get_link = ethtool_op_get_link,
  2106. .get_sset_count = xennet_get_sset_count,
  2107. .get_ethtool_stats = xennet_get_ethtool_stats,
  2108. .get_strings = xennet_get_strings,
  2109. .get_ts_info = ethtool_op_get_ts_info,
  2110. };
  2111. #ifdef CONFIG_SYSFS
  2112. static ssize_t show_rxbuf(struct device *dev,
  2113. struct device_attribute *attr, char *buf)
  2114. {
  2115. return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
  2116. }
  2117. static ssize_t store_rxbuf(struct device *dev,
  2118. struct device_attribute *attr,
  2119. const char *buf, size_t len)
  2120. {
  2121. char *endp;
  2122. if (!capable(CAP_NET_ADMIN))
  2123. return -EPERM;
  2124. simple_strtoul(buf, &endp, 0);
  2125. if (endp == buf)
  2126. return -EBADMSG;
  2127. /* rxbuf_min and rxbuf_max are no longer configurable. */
  2128. return len;
  2129. }
  2130. static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
  2131. static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
  2132. static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
  2133. static struct attribute *xennet_dev_attrs[] = {
  2134. &dev_attr_rxbuf_min.attr,
  2135. &dev_attr_rxbuf_max.attr,
  2136. &dev_attr_rxbuf_cur.attr,
  2137. NULL
  2138. };
  2139. static const struct attribute_group xennet_dev_group = {
  2140. .attrs = xennet_dev_attrs
  2141. };
  2142. #endif /* CONFIG_SYSFS */
  2143. static void xennet_bus_close(struct xenbus_device *dev)
  2144. {
  2145. int ret;
  2146. if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed)
  2147. return;
  2148. do {
  2149. xenbus_switch_state(dev, XenbusStateClosing);
  2150. ret = wait_event_timeout(module_wq,
  2151. xenbus_read_driver_state(dev, dev->otherend) ==
  2152. XenbusStateClosing ||
  2153. xenbus_read_driver_state(dev, dev->otherend) ==
  2154. XenbusStateClosed ||
  2155. xenbus_read_driver_state(dev, dev->otherend) ==
  2156. XenbusStateUnknown,
  2157. XENNET_TIMEOUT);
  2158. } while (!ret);
  2159. if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed)
  2160. return;
  2161. do {
  2162. xenbus_switch_state(dev, XenbusStateClosed);
  2163. ret = wait_event_timeout(module_wq,
  2164. xenbus_read_driver_state(dev, dev->otherend) ==
  2165. XenbusStateClosed ||
  2166. xenbus_read_driver_state(dev, dev->otherend) ==
  2167. XenbusStateUnknown,
  2168. XENNET_TIMEOUT);
  2169. } while (!ret);
  2170. }
  2171. static void xennet_remove(struct xenbus_device *dev)
  2172. {
  2173. struct netfront_info *info = dev_get_drvdata(&dev->dev);
  2174. xennet_bus_close(dev);
  2175. xennet_disconnect_backend(info);
  2176. if (info->netdev->reg_state == NETREG_REGISTERED)
  2177. unregister_netdev(info->netdev);
  2178. if (info->queues) {
  2179. rtnl_lock();
  2180. xennet_destroy_queues(info);
  2181. rtnl_unlock();
  2182. }
  2183. xennet_free_netdev(info->netdev);
  2184. }
  2185. static const struct xenbus_device_id netfront_ids[] = {
  2186. { "vif" },
  2187. { "" }
  2188. };
  2189. static struct xenbus_driver netfront_driver = {
  2190. .ids = netfront_ids,
  2191. .probe = netfront_probe,
  2192. .remove = xennet_remove,
  2193. .resume = netfront_resume,
  2194. .otherend_changed = netback_changed,
  2195. };
  2196. static int __init netif_init(void)
  2197. {
  2198. if (!xen_domain())
  2199. return -ENODEV;
  2200. if (!xen_has_pv_nic_devices())
  2201. return -ENODEV;
  2202. pr_info("Initialising Xen virtual ethernet driver\n");
  2203. /* Allow the number of queues to match the number of CPUs, but not exceed
  2204. * the maximum limit. If the user has not specified a value, the default
  2205. * maximum limit is 8.
  2206. */
  2207. if (xennet_max_queues == 0)
  2208. xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
  2209. num_online_cpus());
  2210. return xenbus_register_frontend(&netfront_driver);
  2211. }
  2212. module_init(netif_init);
  2213. static void __exit netif_exit(void)
  2214. {
  2215. xenbus_unregister_driver(&netfront_driver);
  2216. }
  2217. module_exit(netif_exit);
  2218. MODULE_DESCRIPTION("Xen virtual network device frontend");
  2219. MODULE_LICENSE("GPL");
  2220. MODULE_ALIAS("xen:vif");
  2221. MODULE_ALIAS("xennet");