vmbus_bufring.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. // SPDX-License-Identifier: BSD-3-Clause
  2. /*
  3. * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
  4. * Copyright (c) 2012 NetApp Inc.
  5. * Copyright (c) 2012 Citrix Inc.
  6. * All rights reserved.
  7. */
  8. #include <errno.h>
  9. #include <fcntl.h>
  10. #include <emmintrin.h>
  11. #include <linux/limits.h>
  12. #include <stdbool.h>
  13. #include <stdint.h>
  14. #include <stdio.h>
  15. #include <string.h>
  16. #include <sys/mman.h>
  17. #include <sys/uio.h>
  18. #include <unistd.h>
  19. #include "vmbus_bufring.h"
  20. /**
  21. * Compiler barrier.
  22. *
  23. * Guarantees that operation reordering does not occur at compile time
  24. * for operations directly before and after the barrier.
  25. */
  26. #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); })
  27. #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF
  28. #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
  29. void *vmbus_uio_map(int *fd, int size)
  30. {
  31. void *map;
  32. map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
  33. if (map == MAP_FAILED)
  34. return NULL;
  35. return map;
  36. }
  37. /* Increase bufring index by inc with wraparound */
  38. static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
  39. {
  40. idx += inc;
  41. if (idx >= sz)
  42. idx -= sz;
  43. return idx;
  44. }
  45. void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
  46. {
  47. br->vbr = buf;
  48. br->windex = br->vbr->windex;
  49. br->dsize = blen - sizeof(struct vmbus_bufring);
  50. }
  51. static inline __always_inline void
  52. rte_smp_mb(void)
  53. {
  54. asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
  55. }
  56. static inline int
  57. rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
  58. {
  59. uint8_t res;
  60. asm volatile("lock ; "
  61. "cmpxchgl %[src], %[dst];"
  62. "sete %[res];"
  63. : [res] "=a" (res), /* output */
  64. [dst] "=m" (*dst)
  65. : [src] "r" (src), /* input */
  66. "a" (exp),
  67. "m" (*dst)
  68. : "memory"); /* no-clobber list */
  69. return res;
  70. }
  71. static inline uint32_t
  72. vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
  73. const void *src0, uint32_t cplen)
  74. {
  75. uint8_t *br_data = tbr->vbr->data;
  76. uint32_t br_dsize = tbr->dsize;
  77. const uint8_t *src = src0;
  78. /* XXX use double mapping like Linux kernel? */
  79. if (cplen > br_dsize - windex) {
  80. uint32_t fraglen = br_dsize - windex;
  81. /* Wrap-around detected */
  82. memcpy(br_data + windex, src, fraglen);
  83. memcpy(br_data, src + fraglen, cplen - fraglen);
  84. } else {
  85. memcpy(br_data + windex, src, cplen);
  86. }
  87. return vmbus_br_idxinc(windex, cplen, br_dsize);
  88. }
  89. /*
  90. * Write scattered channel packet to TX bufring.
  91. *
  92. * The offset of this channel packet is written as a 64bits value
  93. * immediately after this channel packet.
  94. *
  95. * The write goes through three stages:
  96. * 1. Reserve space in ring buffer for the new data.
  97. * Writer atomically moves priv_write_index.
  98. * 2. Copy the new data into the ring.
  99. * 3. Update the tail of the ring (visible to host) that indicates
  100. * next read location. Writer updates write_index
  101. */
  102. static int
  103. vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
  104. {
  105. struct vmbus_bufring *vbr = tbr->vbr;
  106. uint32_t ring_size = tbr->dsize;
  107. uint32_t old_windex, next_windex, windex, total;
  108. uint64_t save_windex;
  109. int i;
  110. total = 0;
  111. for (i = 0; i < iovlen; i++)
  112. total += iov[i].iov_len;
  113. total += sizeof(save_windex);
  114. /* Reserve space in ring */
  115. do {
  116. uint32_t avail;
  117. /* Get current free location */
  118. old_windex = tbr->windex;
  119. /* Prevent compiler reordering this with calculation */
  120. rte_compiler_barrier();
  121. avail = vmbus_br_availwrite(tbr, old_windex);
  122. /* If not enough space in ring, then tell caller. */
  123. if (avail <= total)
  124. return -EAGAIN;
  125. next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
  126. /* Atomic update of next write_index for other threads */
  127. } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
  128. /* Space from old..new is now reserved */
  129. windex = old_windex;
  130. for (i = 0; i < iovlen; i++)
  131. windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
  132. /* Set the offset of the current channel packet. */
  133. save_windex = ((uint64_t)old_windex) << 32;
  134. windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
  135. sizeof(save_windex));
  136. /* The region reserved should match region used */
  137. if (windex != next_windex)
  138. return -EINVAL;
  139. /* Ensure that data is available before updating host index */
  140. rte_compiler_barrier();
  141. /* Checkin for our reservation. wait for our turn to update host */
  142. while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
  143. _mm_pause();
  144. return 0;
  145. }
  146. int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
  147. uint32_t dlen, uint32_t flags)
  148. {
  149. struct vmbus_chanpkt pkt;
  150. unsigned int pktlen, pad_pktlen;
  151. const uint32_t hlen = sizeof(pkt);
  152. uint64_t pad = 0;
  153. struct iovec iov[3];
  154. int error;
  155. pktlen = hlen + dlen;
  156. pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
  157. pkt.hdr.type = type;
  158. pkt.hdr.flags = flags;
  159. pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
  160. pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
  161. pkt.hdr.xactid = VMBUS_RQST_ERROR;
  162. iov[0].iov_base = &pkt;
  163. iov[0].iov_len = hlen;
  164. iov[1].iov_base = data;
  165. iov[1].iov_len = dlen;
  166. iov[2].iov_base = &pad;
  167. iov[2].iov_len = pad_pktlen - pktlen;
  168. error = vmbus_txbr_write(txbr, iov, 3);
  169. return error;
  170. }
  171. static inline uint32_t
  172. vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
  173. void *dst0, size_t cplen)
  174. {
  175. const uint8_t *br_data = rbr->vbr->data;
  176. uint32_t br_dsize = rbr->dsize;
  177. uint8_t *dst = dst0;
  178. if (cplen > br_dsize - rindex) {
  179. uint32_t fraglen = br_dsize - rindex;
  180. /* Wrap-around detected. */
  181. memcpy(dst, br_data + rindex, fraglen);
  182. memcpy(dst + fraglen, br_data, cplen - fraglen);
  183. } else {
  184. memcpy(dst, br_data + rindex, cplen);
  185. }
  186. return vmbus_br_idxinc(rindex, cplen, br_dsize);
  187. }
  188. /* Copy data from receive ring but don't change index */
  189. static int
  190. vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
  191. {
  192. uint32_t avail;
  193. /*
  194. * The requested data and the 64bits channel packet
  195. * offset should be there at least.
  196. */
  197. avail = vmbus_br_availread(rbr);
  198. if (avail < dlen + sizeof(uint64_t))
  199. return -EAGAIN;
  200. vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
  201. return 0;
  202. }
  203. /*
  204. * Copy data from receive ring and change index
  205. * NOTE:
  206. * We assume (dlen + skip) == sizeof(channel packet).
  207. */
  208. static int
  209. vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
  210. {
  211. struct vmbus_bufring *vbr = rbr->vbr;
  212. uint32_t br_dsize = rbr->dsize;
  213. uint32_t rindex;
  214. if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
  215. return -EAGAIN;
  216. /* Record where host was when we started read (for debug) */
  217. rbr->windex = rbr->vbr->windex;
  218. /*
  219. * Copy channel packet from RX bufring.
  220. */
  221. rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
  222. rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
  223. /*
  224. * Discard this channel packet's 64bits offset, which is useless to us.
  225. */
  226. rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
  227. /* Update the read index _after_ the channel packet is fetched. */
  228. rte_compiler_barrier();
  229. vbr->rindex = rindex;
  230. return 0;
  231. }
  232. int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
  233. void *data, uint32_t *len)
  234. {
  235. struct vmbus_chanpkt_hdr pkt;
  236. uint32_t dlen, bufferlen = *len;
  237. int error;
  238. error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
  239. if (error)
  240. return error;
  241. if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
  242. /* XXX this channel is dead actually. */
  243. return -EIO;
  244. if (unlikely(pkt.hlen > pkt.tlen))
  245. return -EIO;
  246. /* Length are in quad words */
  247. dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
  248. *len = dlen;
  249. /* If caller buffer is not large enough */
  250. if (unlikely(dlen > bufferlen))
  251. return -ENOBUFS;
  252. /* Read data and skip packet header */
  253. error = vmbus_rxbr_read(rxbr, data, dlen, 0);
  254. if (error)
  255. return error;
  256. /* Return the number of bytes read */
  257. return dlen + sizeof(uint64_t);
  258. }