virtio_transport.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * virtio transport for vsock
  4. *
  5. * Copyright (C) 2013-2015 Red Hat, Inc.
  6. * Author: Asias He <asias@redhat.com>
  7. * Stefan Hajnoczi <stefanha@redhat.com>
  8. *
  9. * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
  10. * early virtio-vsock proof-of-concept bits.
  11. */
  12. #include <linux/spinlock.h>
  13. #include <linux/module.h>
  14. #include <linux/list.h>
  15. #include <linux/atomic.h>
  16. #include <linux/virtio.h>
  17. #include <linux/virtio_ids.h>
  18. #include <linux/virtio_config.h>
  19. #include <linux/virtio_vsock.h>
  20. #include <linux/dma-mapping.h>
  21. #include <net/sock.h>
  22. #include <linux/mutex.h>
  23. #include <net/af_vsock.h>
  24. static struct workqueue_struct *virtio_vsock_workqueue;
  25. static struct virtio_vsock __rcu *the_virtio_vsock;
  26. static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
  27. static struct virtio_transport virtio_transport; /* forward declaration */
  28. struct virtio_vsock {
  29. struct virtio_device *vdev;
  30. struct virtqueue *vqs[VSOCK_VQ_MAX];
  31. /* Virtqueue processing is deferred to a workqueue */
  32. struct work_struct tx_work;
  33. struct work_struct rx_work;
  34. struct work_struct event_work;
  35. /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
  36. * must be accessed with tx_lock held.
  37. */
  38. struct mutex tx_lock;
  39. bool tx_run;
  40. struct work_struct send_pkt_work;
  41. struct sk_buff_head send_pkt_queue;
  42. atomic_t queued_replies;
  43. /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
  44. * must be accessed with rx_lock held.
  45. */
  46. struct mutex rx_lock;
  47. bool rx_run;
  48. int rx_buf_nr;
  49. int rx_buf_max_nr;
  50. u32 guest_cid;
  51. bool seqpacket_allow;
  52. /* These fields are used only in tx path in function
  53. * 'virtio_transport_send_pkt_work()', so to save
  54. * stack space in it, place both of them here. Each
  55. * pointer from 'out_sgs' points to the corresponding
  56. * element in 'out_bufs' - this is initialized in
  57. * 'virtio_vsock_probe()'. Both fields are protected
  58. * by 'tx_lock'. +1 is needed for packet header.
  59. */
  60. struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1];
  61. struct scatterlist out_bufs[MAX_SKB_FRAGS + 1];
  62. /* The following fields are protected by event_lock.
  63. * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
  64. */
  65. struct mutex event_lock;
  66. bool event_run;
  67. __dma_from_device_group_begin();
  68. struct virtio_vsock_event event_list[8];
  69. __dma_from_device_group_end();
  70. };
  71. static u32 virtio_transport_get_local_cid(void)
  72. {
  73. struct virtio_vsock *vsock;
  74. u32 ret;
  75. rcu_read_lock();
  76. vsock = rcu_dereference(the_virtio_vsock);
  77. if (!vsock) {
  78. ret = VMADDR_CID_ANY;
  79. goto out_rcu;
  80. }
  81. ret = vsock->guest_cid;
  82. out_rcu:
  83. rcu_read_unlock();
  84. return ret;
  85. }
  86. /* Caller need to hold vsock->tx_lock on vq */
  87. static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq,
  88. struct virtio_vsock *vsock, gfp_t gfp)
  89. {
  90. int ret, in_sg = 0, out_sg = 0;
  91. struct scatterlist **sgs;
  92. sgs = vsock->out_sgs;
  93. sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
  94. sizeof(*virtio_vsock_hdr(skb)));
  95. out_sg++;
  96. if (!skb_is_nonlinear(skb)) {
  97. if (skb->len > 0) {
  98. sg_init_one(sgs[out_sg], skb->data, skb->len);
  99. out_sg++;
  100. }
  101. } else {
  102. struct skb_shared_info *si;
  103. int i;
  104. /* If skb is nonlinear, then its buffer must contain
  105. * only header and nothing more. Data is stored in
  106. * the fragged part.
  107. */
  108. WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
  109. si = skb_shinfo(skb);
  110. for (i = 0; i < si->nr_frags; i++) {
  111. skb_frag_t *skb_frag = &si->frags[i];
  112. void *va;
  113. /* We will use 'page_to_virt()' for the userspace page
  114. * here, because virtio or dma-mapping layers will call
  115. * 'virt_to_phys()' later to fill the buffer descriptor.
  116. * We don't touch memory at "virtual" address of this page.
  117. */
  118. va = page_to_virt(skb_frag_page(skb_frag));
  119. sg_init_one(sgs[out_sg],
  120. va + skb_frag_off(skb_frag),
  121. skb_frag_size(skb_frag));
  122. out_sg++;
  123. }
  124. }
  125. ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, gfp);
  126. /* Usually this means that there is no more space available in
  127. * the vq
  128. */
  129. if (ret < 0)
  130. return ret;
  131. virtio_transport_deliver_tap_pkt(skb);
  132. return 0;
  133. }
  134. static void
  135. virtio_transport_send_pkt_work(struct work_struct *work)
  136. {
  137. struct virtio_vsock *vsock =
  138. container_of(work, struct virtio_vsock, send_pkt_work);
  139. struct virtqueue *vq;
  140. bool added = false;
  141. bool restart_rx = false;
  142. mutex_lock(&vsock->tx_lock);
  143. if (!vsock->tx_run)
  144. goto out;
  145. vq = vsock->vqs[VSOCK_VQ_TX];
  146. for (;;) {
  147. struct sk_buff *skb;
  148. bool reply;
  149. int ret;
  150. skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
  151. if (!skb)
  152. break;
  153. reply = virtio_vsock_skb_reply(skb);
  154. ret = virtio_transport_send_skb(skb, vq, vsock, GFP_KERNEL);
  155. if (ret < 0) {
  156. virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
  157. break;
  158. }
  159. if (reply) {
  160. struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
  161. int val;
  162. val = atomic_dec_return(&vsock->queued_replies);
  163. /* Do we now have resources to resume rx processing? */
  164. if (val + 1 == virtqueue_get_vring_size(rx_vq))
  165. restart_rx = true;
  166. }
  167. added = true;
  168. }
  169. if (added)
  170. virtqueue_kick(vq);
  171. out:
  172. mutex_unlock(&vsock->tx_lock);
  173. if (restart_rx)
  174. queue_work(virtio_vsock_workqueue, &vsock->rx_work);
  175. }
  176. /* Caller need to hold RCU for vsock.
  177. * Returns 0 if the packet is successfully put on the vq.
  178. */
  179. static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb)
  180. {
  181. struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
  182. int ret;
  183. /* Inside RCU, can't sleep! */
  184. ret = mutex_trylock(&vsock->tx_lock);
  185. if (unlikely(ret == 0))
  186. return -EBUSY;
  187. ret = virtio_transport_send_skb(skb, vq, vsock, GFP_ATOMIC);
  188. if (ret == 0)
  189. virtqueue_kick(vq);
  190. mutex_unlock(&vsock->tx_lock);
  191. return ret;
  192. }
  193. static int
  194. virtio_transport_send_pkt(struct sk_buff *skb, struct net *net)
  195. {
  196. struct virtio_vsock_hdr *hdr;
  197. struct virtio_vsock *vsock;
  198. int len = skb->len;
  199. hdr = virtio_vsock_hdr(skb);
  200. rcu_read_lock();
  201. vsock = rcu_dereference(the_virtio_vsock);
  202. if (!vsock) {
  203. kfree_skb(skb);
  204. len = -ENODEV;
  205. goto out_rcu;
  206. }
  207. if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) {
  208. kfree_skb(skb);
  209. len = -ENODEV;
  210. goto out_rcu;
  211. }
  212. /* If send_pkt_queue is empty, we can safely bypass this queue
  213. * because packet order is maintained and (try) to put the packet
  214. * on the virtqueue using virtio_transport_send_skb_fast_path.
  215. * If this fails we simply put the packet on the intermediate
  216. * queue and schedule the worker.
  217. */
  218. if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) ||
  219. virtio_transport_send_skb_fast_path(vsock, skb)) {
  220. if (virtio_vsock_skb_reply(skb))
  221. atomic_inc(&vsock->queued_replies);
  222. virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
  223. queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
  224. }
  225. out_rcu:
  226. rcu_read_unlock();
  227. return len;
  228. }
  229. static int
  230. virtio_transport_cancel_pkt(struct vsock_sock *vsk)
  231. {
  232. struct virtio_vsock *vsock;
  233. int cnt = 0, ret;
  234. rcu_read_lock();
  235. vsock = rcu_dereference(the_virtio_vsock);
  236. if (!vsock) {
  237. ret = -ENODEV;
  238. goto out_rcu;
  239. }
  240. cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
  241. if (cnt) {
  242. struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
  243. int new_cnt;
  244. new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
  245. if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) &&
  246. new_cnt < virtqueue_get_vring_size(rx_vq))
  247. queue_work(virtio_vsock_workqueue, &vsock->rx_work);
  248. }
  249. ret = 0;
  250. out_rcu:
  251. rcu_read_unlock();
  252. return ret;
  253. }
  254. static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
  255. {
  256. int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
  257. struct scatterlist pkt, *p;
  258. struct virtqueue *vq;
  259. struct sk_buff *skb;
  260. int ret;
  261. vq = vsock->vqs[VSOCK_VQ_RX];
  262. do {
  263. skb = virtio_vsock_alloc_linear_skb(total_len, GFP_KERNEL);
  264. if (!skb)
  265. break;
  266. memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM);
  267. sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len);
  268. p = &pkt;
  269. ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL);
  270. if (ret < 0) {
  271. kfree_skb(skb);
  272. break;
  273. }
  274. vsock->rx_buf_nr++;
  275. } while (vq->num_free);
  276. if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
  277. vsock->rx_buf_max_nr = vsock->rx_buf_nr;
  278. virtqueue_kick(vq);
  279. }
  280. static void virtio_transport_tx_work(struct work_struct *work)
  281. {
  282. struct virtio_vsock *vsock =
  283. container_of(work, struct virtio_vsock, tx_work);
  284. struct virtqueue *vq;
  285. bool added = false;
  286. vq = vsock->vqs[VSOCK_VQ_TX];
  287. mutex_lock(&vsock->tx_lock);
  288. if (!vsock->tx_run)
  289. goto out;
  290. do {
  291. struct sk_buff *skb;
  292. unsigned int len;
  293. virtqueue_disable_cb(vq);
  294. while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
  295. virtio_transport_consume_skb_sent(skb, true);
  296. added = true;
  297. }
  298. } while (!virtqueue_enable_cb(vq));
  299. out:
  300. mutex_unlock(&vsock->tx_lock);
  301. if (added)
  302. queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
  303. }
  304. /* Is there space left for replies to rx packets? */
  305. static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
  306. {
  307. struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX];
  308. int val;
  309. smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
  310. val = atomic_read(&vsock->queued_replies);
  311. return val < virtqueue_get_vring_size(vq);
  312. }
  313. /* event_lock must be held */
  314. static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
  315. struct virtio_vsock_event *event)
  316. {
  317. struct scatterlist sg;
  318. struct virtqueue *vq;
  319. vq = vsock->vqs[VSOCK_VQ_EVENT];
  320. sg_init_one(&sg, event, sizeof(*event));
  321. return virtqueue_add_inbuf_cache_clean(vq, &sg, 1, event, GFP_KERNEL);
  322. }
  323. /* event_lock must be held */
  324. static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
  325. {
  326. size_t i;
  327. for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
  328. struct virtio_vsock_event *event = &vsock->event_list[i];
  329. virtio_vsock_event_fill_one(vsock, event);
  330. }
  331. virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
  332. }
  333. static void virtio_vsock_reset_sock(struct sock *sk)
  334. {
  335. /* vmci_transport.c doesn't take sk_lock here either. At least we're
  336. * under vsock_table_lock so the sock cannot disappear while we're
  337. * executing.
  338. */
  339. sk->sk_state = TCP_CLOSE;
  340. sk->sk_err = ECONNRESET;
  341. sk_error_report(sk);
  342. }
  343. static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
  344. {
  345. struct virtio_device *vdev = vsock->vdev;
  346. __le64 guest_cid;
  347. vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
  348. &guest_cid, sizeof(guest_cid));
  349. vsock->guest_cid = le64_to_cpu(guest_cid);
  350. }
  351. /* event_lock must be held */
  352. static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
  353. struct virtio_vsock_event *event)
  354. {
  355. switch (le32_to_cpu(event->id)) {
  356. case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
  357. virtio_vsock_update_guest_cid(vsock);
  358. vsock_for_each_connected_socket(&virtio_transport.transport,
  359. virtio_vsock_reset_sock);
  360. break;
  361. }
  362. }
  363. static void virtio_transport_event_work(struct work_struct *work)
  364. {
  365. struct virtio_vsock *vsock =
  366. container_of(work, struct virtio_vsock, event_work);
  367. struct virtqueue *vq;
  368. vq = vsock->vqs[VSOCK_VQ_EVENT];
  369. mutex_lock(&vsock->event_lock);
  370. if (!vsock->event_run)
  371. goto out;
  372. do {
  373. struct virtio_vsock_event *event;
  374. unsigned int len;
  375. virtqueue_disable_cb(vq);
  376. while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
  377. if (len == sizeof(*event))
  378. virtio_vsock_event_handle(vsock, event);
  379. virtio_vsock_event_fill_one(vsock, event);
  380. }
  381. } while (!virtqueue_enable_cb(vq));
  382. virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
  383. out:
  384. mutex_unlock(&vsock->event_lock);
  385. }
  386. static void virtio_vsock_event_done(struct virtqueue *vq)
  387. {
  388. struct virtio_vsock *vsock = vq->vdev->priv;
  389. if (!vsock)
  390. return;
  391. queue_work(virtio_vsock_workqueue, &vsock->event_work);
  392. }
  393. static void virtio_vsock_tx_done(struct virtqueue *vq)
  394. {
  395. struct virtio_vsock *vsock = vq->vdev->priv;
  396. if (!vsock)
  397. return;
  398. queue_work(virtio_vsock_workqueue, &vsock->tx_work);
  399. }
  400. static void virtio_vsock_rx_done(struct virtqueue *vq)
  401. {
  402. struct virtio_vsock *vsock = vq->vdev->priv;
  403. if (!vsock)
  404. return;
  405. queue_work(virtio_vsock_workqueue, &vsock->rx_work);
  406. }
  407. static bool virtio_transport_can_msgzerocopy(int bufs_num)
  408. {
  409. struct virtio_vsock *vsock;
  410. bool res = false;
  411. rcu_read_lock();
  412. vsock = rcu_dereference(the_virtio_vsock);
  413. if (vsock) {
  414. struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
  415. /* Check that tx queue is large enough to keep whole
  416. * data to send. This is needed, because when there is
  417. * not enough free space in the queue, current skb to
  418. * send will be reinserted to the head of tx list of
  419. * the socket to retry transmission later, so if skb
  420. * is bigger than whole queue, it will be reinserted
  421. * again and again, thus blocking other skbs to be sent.
  422. * Each page of the user provided buffer will be added
  423. * as a single buffer to the tx virtqueue, so compare
  424. * number of pages against maximum capacity of the queue.
  425. */
  426. if (bufs_num <= vq->num_max)
  427. res = true;
  428. }
  429. rcu_read_unlock();
  430. return res;
  431. }
  432. static bool virtio_transport_msgzerocopy_allow(void)
  433. {
  434. return true;
  435. }
  436. bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port)
  437. {
  438. return vsock_net_mode_global(vsk);
  439. }
  440. static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk,
  441. u32 remote_cid);
  442. static struct virtio_transport virtio_transport = {
  443. .transport = {
  444. .module = THIS_MODULE,
  445. .get_local_cid = virtio_transport_get_local_cid,
  446. .init = virtio_transport_do_socket_init,
  447. .destruct = virtio_transport_destruct,
  448. .release = virtio_transport_release,
  449. .connect = virtio_transport_connect,
  450. .shutdown = virtio_transport_shutdown,
  451. .cancel_pkt = virtio_transport_cancel_pkt,
  452. .dgram_bind = virtio_transport_dgram_bind,
  453. .dgram_dequeue = virtio_transport_dgram_dequeue,
  454. .dgram_enqueue = virtio_transport_dgram_enqueue,
  455. .dgram_allow = virtio_transport_dgram_allow,
  456. .stream_dequeue = virtio_transport_stream_dequeue,
  457. .stream_enqueue = virtio_transport_stream_enqueue,
  458. .stream_has_data = virtio_transport_stream_has_data,
  459. .stream_has_space = virtio_transport_stream_has_space,
  460. .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
  461. .stream_is_active = virtio_transport_stream_is_active,
  462. .stream_allow = virtio_transport_stream_allow,
  463. .seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
  464. .seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
  465. .seqpacket_allow = virtio_transport_seqpacket_allow,
  466. .seqpacket_has_data = virtio_transport_seqpacket_has_data,
  467. .msgzerocopy_allow = virtio_transport_msgzerocopy_allow,
  468. .notify_poll_in = virtio_transport_notify_poll_in,
  469. .notify_poll_out = virtio_transport_notify_poll_out,
  470. .notify_recv_init = virtio_transport_notify_recv_init,
  471. .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
  472. .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
  473. .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
  474. .notify_send_init = virtio_transport_notify_send_init,
  475. .notify_send_pre_block = virtio_transport_notify_send_pre_block,
  476. .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
  477. .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
  478. .notify_buffer_size = virtio_transport_notify_buffer_size,
  479. .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
  480. .unsent_bytes = virtio_transport_unsent_bytes,
  481. .read_skb = virtio_transport_read_skb,
  482. },
  483. .send_pkt = virtio_transport_send_pkt,
  484. .can_msgzerocopy = virtio_transport_can_msgzerocopy,
  485. };
  486. static bool
  487. virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
  488. {
  489. struct virtio_vsock *vsock;
  490. bool seqpacket_allow;
  491. if (!vsock_net_mode_global(vsk))
  492. return false;
  493. seqpacket_allow = false;
  494. rcu_read_lock();
  495. vsock = rcu_dereference(the_virtio_vsock);
  496. if (vsock)
  497. seqpacket_allow = vsock->seqpacket_allow;
  498. rcu_read_unlock();
  499. return seqpacket_allow;
  500. }
  501. static void virtio_transport_rx_work(struct work_struct *work)
  502. {
  503. struct virtio_vsock *vsock =
  504. container_of(work, struct virtio_vsock, rx_work);
  505. struct virtqueue *vq;
  506. vq = vsock->vqs[VSOCK_VQ_RX];
  507. mutex_lock(&vsock->rx_lock);
  508. if (!vsock->rx_run)
  509. goto out;
  510. do {
  511. virtqueue_disable_cb(vq);
  512. for (;;) {
  513. unsigned int len, payload_len;
  514. struct virtio_vsock_hdr *hdr;
  515. struct sk_buff *skb;
  516. if (!virtio_transport_more_replies(vsock)) {
  517. /* Stop rx until the device processes already
  518. * pending replies. Leave rx virtqueue
  519. * callbacks disabled.
  520. */
  521. goto out;
  522. }
  523. skb = virtqueue_get_buf(vq, &len);
  524. if (!skb)
  525. break;
  526. vsock->rx_buf_nr--;
  527. /* Drop short/long packets */
  528. if (unlikely(len < sizeof(*hdr) ||
  529. len > virtio_vsock_skb_len(skb))) {
  530. kfree_skb(skb);
  531. continue;
  532. }
  533. hdr = virtio_vsock_hdr(skb);
  534. payload_len = le32_to_cpu(hdr->len);
  535. if (unlikely(payload_len > len - sizeof(*hdr))) {
  536. kfree_skb(skb);
  537. continue;
  538. }
  539. if (payload_len)
  540. virtio_vsock_skb_put(skb, payload_len);
  541. virtio_transport_deliver_tap_pkt(skb);
  542. /* Force virtio-transport into global mode since it
  543. * does not yet support local-mode namespacing.
  544. */
  545. virtio_transport_recv_pkt(&virtio_transport, skb, NULL);
  546. }
  547. } while (!virtqueue_enable_cb(vq));
  548. out:
  549. if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
  550. virtio_vsock_rx_fill(vsock);
  551. mutex_unlock(&vsock->rx_lock);
  552. }
  553. static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
  554. {
  555. struct virtio_device *vdev = vsock->vdev;
  556. struct virtqueue_info vqs_info[] = {
  557. { "rx", virtio_vsock_rx_done },
  558. { "tx", virtio_vsock_tx_done },
  559. { "event", virtio_vsock_event_done },
  560. };
  561. int ret;
  562. mutex_lock(&vsock->rx_lock);
  563. vsock->rx_buf_nr = 0;
  564. vsock->rx_buf_max_nr = 0;
  565. mutex_unlock(&vsock->rx_lock);
  566. atomic_set(&vsock->queued_replies, 0);
  567. ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
  568. if (ret < 0)
  569. return ret;
  570. virtio_vsock_update_guest_cid(vsock);
  571. virtio_device_ready(vdev);
  572. return 0;
  573. }
  574. static void virtio_vsock_vqs_start(struct virtio_vsock *vsock)
  575. {
  576. mutex_lock(&vsock->tx_lock);
  577. vsock->tx_run = true;
  578. mutex_unlock(&vsock->tx_lock);
  579. mutex_lock(&vsock->rx_lock);
  580. virtio_vsock_rx_fill(vsock);
  581. vsock->rx_run = true;
  582. mutex_unlock(&vsock->rx_lock);
  583. mutex_lock(&vsock->event_lock);
  584. virtio_vsock_event_fill(vsock);
  585. vsock->event_run = true;
  586. mutex_unlock(&vsock->event_lock);
  587. /* virtio_transport_send_pkt() can queue packets once
  588. * the_virtio_vsock is set, but they won't be processed until
  589. * vsock->tx_run is set to true. We queue vsock->send_pkt_work
  590. * when initialization finishes to send those packets queued
  591. * earlier.
  592. * We don't need to queue the other workers (rx, event) because
  593. * as long as we don't fill the queues with empty buffers, the
  594. * host can't send us any notification.
  595. */
  596. queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
  597. }
  598. static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
  599. {
  600. struct virtio_device *vdev = vsock->vdev;
  601. struct sk_buff *skb;
  602. /* Reset all connected sockets when the VQs disappear */
  603. vsock_for_each_connected_socket(&virtio_transport.transport,
  604. virtio_vsock_reset_sock);
  605. /* Stop all work handlers to make sure no one is accessing the device,
  606. * so we can safely call virtio_reset_device().
  607. */
  608. mutex_lock(&vsock->rx_lock);
  609. vsock->rx_run = false;
  610. mutex_unlock(&vsock->rx_lock);
  611. mutex_lock(&vsock->tx_lock);
  612. vsock->tx_run = false;
  613. mutex_unlock(&vsock->tx_lock);
  614. mutex_lock(&vsock->event_lock);
  615. vsock->event_run = false;
  616. mutex_unlock(&vsock->event_lock);
  617. /* Flush all device writes and interrupts, device will not use any
  618. * more buffers.
  619. */
  620. virtio_reset_device(vdev);
  621. mutex_lock(&vsock->rx_lock);
  622. while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
  623. kfree_skb(skb);
  624. mutex_unlock(&vsock->rx_lock);
  625. mutex_lock(&vsock->tx_lock);
  626. while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
  627. kfree_skb(skb);
  628. mutex_unlock(&vsock->tx_lock);
  629. virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
  630. /* Delete virtqueues and flush outstanding callbacks if any */
  631. vdev->config->del_vqs(vdev);
  632. }
  633. static int virtio_vsock_probe(struct virtio_device *vdev)
  634. {
  635. struct virtio_vsock *vsock = NULL;
  636. int ret;
  637. int i;
  638. ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
  639. if (ret)
  640. return ret;
  641. /* Only one virtio-vsock device per guest is supported */
  642. if (rcu_dereference_protected(the_virtio_vsock,
  643. lockdep_is_held(&the_virtio_vsock_mutex))) {
  644. ret = -EBUSY;
  645. goto out;
  646. }
  647. vsock = kzalloc_obj(*vsock);
  648. if (!vsock) {
  649. ret = -ENOMEM;
  650. goto out;
  651. }
  652. vsock->vdev = vdev;
  653. mutex_init(&vsock->tx_lock);
  654. mutex_init(&vsock->rx_lock);
  655. mutex_init(&vsock->event_lock);
  656. skb_queue_head_init(&vsock->send_pkt_queue);
  657. INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
  658. INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
  659. INIT_WORK(&vsock->event_work, virtio_transport_event_work);
  660. INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
  661. if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
  662. vsock->seqpacket_allow = true;
  663. vdev->priv = vsock;
  664. ret = virtio_vsock_vqs_init(vsock);
  665. if (ret < 0)
  666. goto out;
  667. for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++)
  668. vsock->out_sgs[i] = &vsock->out_bufs[i];
  669. rcu_assign_pointer(the_virtio_vsock, vsock);
  670. virtio_vsock_vqs_start(vsock);
  671. mutex_unlock(&the_virtio_vsock_mutex);
  672. return 0;
  673. out:
  674. kfree(vsock);
  675. mutex_unlock(&the_virtio_vsock_mutex);
  676. return ret;
  677. }
  678. static void virtio_vsock_remove(struct virtio_device *vdev)
  679. {
  680. struct virtio_vsock *vsock = vdev->priv;
  681. mutex_lock(&the_virtio_vsock_mutex);
  682. vdev->priv = NULL;
  683. rcu_assign_pointer(the_virtio_vsock, NULL);
  684. synchronize_rcu();
  685. virtio_vsock_vqs_del(vsock);
  686. /* Other works can be queued before 'config->del_vqs()', so we flush
  687. * all works before to free the vsock object to avoid use after free.
  688. */
  689. flush_work(&vsock->rx_work);
  690. flush_work(&vsock->tx_work);
  691. flush_work(&vsock->event_work);
  692. flush_work(&vsock->send_pkt_work);
  693. mutex_unlock(&the_virtio_vsock_mutex);
  694. kfree(vsock);
  695. }
  696. #ifdef CONFIG_PM_SLEEP
  697. static int virtio_vsock_freeze(struct virtio_device *vdev)
  698. {
  699. struct virtio_vsock *vsock = vdev->priv;
  700. mutex_lock(&the_virtio_vsock_mutex);
  701. rcu_assign_pointer(the_virtio_vsock, NULL);
  702. synchronize_rcu();
  703. virtio_vsock_vqs_del(vsock);
  704. mutex_unlock(&the_virtio_vsock_mutex);
  705. return 0;
  706. }
  707. static int virtio_vsock_restore(struct virtio_device *vdev)
  708. {
  709. struct virtio_vsock *vsock = vdev->priv;
  710. int ret;
  711. mutex_lock(&the_virtio_vsock_mutex);
  712. /* Only one virtio-vsock device per guest is supported */
  713. if (rcu_dereference_protected(the_virtio_vsock,
  714. lockdep_is_held(&the_virtio_vsock_mutex))) {
  715. ret = -EBUSY;
  716. goto out;
  717. }
  718. ret = virtio_vsock_vqs_init(vsock);
  719. if (ret < 0)
  720. goto out;
  721. rcu_assign_pointer(the_virtio_vsock, vsock);
  722. virtio_vsock_vqs_start(vsock);
  723. out:
  724. mutex_unlock(&the_virtio_vsock_mutex);
  725. return ret;
  726. }
  727. #endif /* CONFIG_PM_SLEEP */
  728. static struct virtio_device_id id_table[] = {
  729. { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
  730. { 0 },
  731. };
  732. static unsigned int features[] = {
  733. VIRTIO_VSOCK_F_SEQPACKET
  734. };
  735. static struct virtio_driver virtio_vsock_driver = {
  736. .feature_table = features,
  737. .feature_table_size = ARRAY_SIZE(features),
  738. .driver.name = KBUILD_MODNAME,
  739. .id_table = id_table,
  740. .probe = virtio_vsock_probe,
  741. .remove = virtio_vsock_remove,
  742. #ifdef CONFIG_PM_SLEEP
  743. .freeze = virtio_vsock_freeze,
  744. .restore = virtio_vsock_restore,
  745. #endif
  746. };
  747. static int __init virtio_vsock_init(void)
  748. {
  749. int ret;
  750. virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", WQ_PERCPU, 0);
  751. if (!virtio_vsock_workqueue)
  752. return -ENOMEM;
  753. ret = vsock_core_register(&virtio_transport.transport,
  754. VSOCK_TRANSPORT_F_G2H);
  755. if (ret)
  756. goto out_wq;
  757. ret = register_virtio_driver(&virtio_vsock_driver);
  758. if (ret)
  759. goto out_vci;
  760. return 0;
  761. out_vci:
  762. vsock_core_unregister(&virtio_transport.transport);
  763. out_wq:
  764. destroy_workqueue(virtio_vsock_workqueue);
  765. return ret;
  766. }
  767. static void __exit virtio_vsock_exit(void)
  768. {
  769. unregister_virtio_driver(&virtio_vsock_driver);
  770. vsock_core_unregister(&virtio_transport.transport);
  771. destroy_workqueue(virtio_vsock_workqueue);
  772. }
  773. module_init(virtio_vsock_init);
  774. module_exit(virtio_vsock_exit);
  775. MODULE_LICENSE("GPL v2");
  776. MODULE_AUTHOR("Asias He");
  777. MODULE_DESCRIPTION("virtio transport for vsock");
  778. MODULE_DEVICE_TABLE(virtio, id_table);