output.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* RxRPC packet transmission
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  8. #include <linux/net.h>
  9. #include <linux/gfp.h>
  10. #include <linux/skbuff.h>
  11. #include <linux/export.h>
  12. #include <net/sock.h>
  13. #include <net/af_rxrpc.h>
  14. #include <net/udp.h>
  15. #include "ar-internal.h"
  16. extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
  17. ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
  18. {
  19. struct sockaddr *sa = msg->msg_name;
  20. struct sock *sk = socket->sk;
  21. if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) {
  22. if (sa->sa_family == AF_INET6) {
  23. if (sk->sk_family != AF_INET6) {
  24. pr_warn("AF_INET6 address on AF_INET socket\n");
  25. return -ENOPROTOOPT;
  26. }
  27. return udpv6_sendmsg(sk, msg, len);
  28. }
  29. }
  30. return udp_sendmsg(sk, msg, len);
  31. }
  32. struct rxrpc_abort_buffer {
  33. struct rxrpc_wire_header whdr;
  34. __be32 abort_code;
  35. };
  36. static const char rxrpc_keepalive_string[] = "";
  37. /*
  38. * Increase Tx backoff on transmission failure and clear it on success.
  39. */
  40. static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
  41. {
  42. if (ret < 0) {
  43. if (call->tx_backoff < 1000)
  44. call->tx_backoff += 100;
  45. } else {
  46. call->tx_backoff = 0;
  47. }
  48. }
  49. /*
  50. * Arrange for a keepalive ping a certain time after we last transmitted. This
  51. * lets the far side know we're still interested in this call and helps keep
  52. * the route through any intervening firewall open.
  53. *
  54. * Receiving a response to the ping will prevent the ->expect_rx_by timer from
  55. * expiring.
  56. */
  57. static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
  58. {
  59. ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
  60. call->keepalive_at = ktime_add(ktime_get_real(), delay);
  61. trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
  62. }
  63. /*
  64. * Allocate transmission buffers for an ACK and attach them to local->kv[].
  65. */
  66. static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size)
  67. {
  68. struct rxrpc_wire_header *whdr;
  69. struct rxrpc_acktrailer *trailer;
  70. struct rxrpc_ackpacket *ack;
  71. struct kvec *kv = call->local->kvec;
  72. gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
  73. void *buf, *buf2 = NULL;
  74. u8 *filler;
  75. buf = page_frag_alloc(&call->local->tx_alloc,
  76. sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
  77. if (!buf)
  78. return -ENOMEM;
  79. if (sack_size) {
  80. buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
  81. if (!buf2) {
  82. page_frag_free(buf);
  83. return -ENOMEM;
  84. }
  85. }
  86. whdr = buf;
  87. ack = buf + sizeof(*whdr);
  88. filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
  89. trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
  90. kv[0].iov_base = whdr;
  91. kv[0].iov_len = sizeof(*whdr) + sizeof(*ack);
  92. kv[1].iov_base = buf2;
  93. kv[1].iov_len = sack_size;
  94. kv[2].iov_base = filler;
  95. kv[2].iov_len = 3 + sizeof(*trailer);
  96. return 3; /* Number of kvec[] used. */
  97. }
  98. static void rxrpc_free_ack(struct rxrpc_call *call)
  99. {
  100. page_frag_free(call->local->kvec[0].iov_base);
  101. if (call->local->kvec[1].iov_base)
  102. page_frag_free(call->local->kvec[1].iov_base);
  103. }
  104. /*
  105. * Record the beginning of an RTT probe.
  106. */
  107. static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
  108. ktime_t now, enum rxrpc_rtt_tx_trace why)
  109. {
  110. unsigned long avail = call->rtt_avail;
  111. int rtt_slot = 9;
  112. if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
  113. goto no_slot;
  114. rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
  115. if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
  116. goto no_slot;
  117. call->rtt_serial[rtt_slot] = serial;
  118. call->rtt_sent_at[rtt_slot] = now;
  119. smp_wmb(); /* Write data before avail bit */
  120. set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
  121. trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
  122. return;
  123. no_slot:
  124. trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
  125. }
  126. /*
  127. * Fill out an ACK packet.
  128. */
  129. static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason,
  130. rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial)
  131. {
  132. struct kvec *kv = call->local->kvec;
  133. struct rxrpc_wire_header *whdr = kv[0].iov_base;
  134. struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
  135. struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
  136. unsigned int qsize, sack, wrap, to, max_mtu, if_mtu;
  137. rxrpc_seq_t window, wtop;
  138. ktime_t now = ktime_get_real();
  139. int rsize;
  140. u8 *filler = kv[2].iov_base;
  141. u8 *sackp = kv[1].iov_base;
  142. rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
  143. window = call->ackr_window;
  144. wtop = call->ackr_wtop;
  145. sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
  146. *_ack_serial = rxrpc_get_next_serial(call->conn);
  147. whdr->epoch = htonl(call->conn->proto.epoch);
  148. whdr->cid = htonl(call->cid);
  149. whdr->callNumber = htonl(call->call_id);
  150. whdr->serial = htonl(*_ack_serial);
  151. whdr->seq = 0;
  152. whdr->type = RXRPC_PACKET_TYPE_ACK;
  153. whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK;
  154. whdr->userStatus = 0;
  155. whdr->securityIndex = call->security_ix;
  156. whdr->_rsvd = 0;
  157. whdr->serviceId = htons(call->dest_srx.srx_service);
  158. ack->bufferSpace = 0;
  159. ack->maxSkew = 0;
  160. ack->firstPacket = htonl(window);
  161. ack->previousPacket = htonl(call->rx_highest_seq);
  162. ack->serial = htonl(serial_to_ack);
  163. ack->reason = ack_reason;
  164. ack->nAcks = wtop - window;
  165. filler[0] = 0;
  166. filler[1] = 0;
  167. filler[2] = 0;
  168. if (ack_reason == RXRPC_ACK_PING)
  169. whdr->flags |= RXRPC_REQUEST_ACK;
  170. if (after(wtop, window)) {
  171. kv[1].iov_len = ack->nAcks;
  172. wrap = RXRPC_SACK_SIZE - sack;
  173. to = umin(ack->nAcks, RXRPC_SACK_SIZE);
  174. if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
  175. memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
  176. } else {
  177. memcpy(sackp, call->ackr_sack_table + sack, wrap);
  178. memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
  179. }
  180. } else if (before(wtop, window)) {
  181. pr_warn("ack window backward %x %x", window, wtop);
  182. } else if (ack->reason == RXRPC_ACK_DELAY) {
  183. ack->reason = RXRPC_ACK_IDLE;
  184. }
  185. qsize = (window - 1) - call->rx_consumed;
  186. rsize = max_t(int, call->rx_winsize - qsize, 0);
  187. if_mtu = call->peer->if_mtu - call->peer->hdrsize;
  188. if (call->peer->ackr_adv_pmtud) {
  189. max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu);
  190. } else {
  191. if_mtu = umin(if_mtu, 1444);
  192. max_mtu = if_mtu;
  193. }
  194. trailer->maxMTU = htonl(max_mtu);
  195. trailer->ifMTU = htonl(if_mtu);
  196. trailer->rwind = htonl(rsize);
  197. trailer->jumbo_max = 0; /* Advertise pmtu discovery */
  198. if (ack_reason == RXRPC_ACK_PING)
  199. rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping);
  200. if (whdr->flags & RXRPC_REQUEST_ACK)
  201. call->rtt_last_req = now;
  202. rxrpc_set_keepalive(call, now);
  203. return nr_kv;
  204. }
  205. /*
  206. * Transmit an ACK packet.
  207. */
  208. static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len,
  209. rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
  210. {
  211. struct kvec *kv = call->local->kvec;
  212. struct rxrpc_wire_header *whdr = kv[0].iov_base;
  213. struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
  214. struct rxrpc_connection *conn;
  215. struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
  216. struct msghdr msg;
  217. int ret;
  218. if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
  219. return;
  220. conn = call->conn;
  221. msg.msg_name = &call->peer->srx.transport;
  222. msg.msg_namelen = call->peer->srx.transport_len;
  223. msg.msg_control = NULL;
  224. msg.msg_controllen = 0;
  225. msg.msg_flags = MSG_SPLICE_PAGES;
  226. trace_rxrpc_tx_ack(call->debug_id, serial,
  227. ntohl(ack->firstPacket),
  228. ntohl(ack->serial), ack->reason, ack->nAcks,
  229. ntohl(trailer->rwind), why);
  230. rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
  231. iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len);
  232. rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe);
  233. ret = do_udp_sendmsg(conn->local->socket, &msg, len);
  234. rxrpc_peer_mark_tx(call->peer);
  235. if (ret < 0) {
  236. trace_rxrpc_tx_fail(call->debug_id, serial, ret,
  237. rxrpc_tx_point_call_ack);
  238. if (why == rxrpc_propose_ack_ping_for_mtu_probe &&
  239. ret == -EMSGSIZE)
  240. rxrpc_input_probe_for_pmtud(conn, serial, true);
  241. } else {
  242. trace_rxrpc_tx_packet(call->debug_id, whdr,
  243. rxrpc_tx_point_call_ack);
  244. if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
  245. call->peer->pmtud_pending = false;
  246. call->peer->pmtud_probing = true;
  247. call->conn->pmtud_probe = serial;
  248. call->conn->pmtud_call = call->debug_id;
  249. trace_rxrpc_pmtud_tx(call);
  250. }
  251. }
  252. rxrpc_tx_backoff(call, ret);
  253. }
  254. /*
  255. * Queue an ACK for immediate transmission.
  256. */
  257. void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
  258. rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why)
  259. {
  260. struct kvec *kv = call->local->kvec;
  261. rxrpc_serial_t ack_serial;
  262. size_t len;
  263. int nr_kv;
  264. if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
  265. return;
  266. rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
  267. nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window);
  268. if (nr_kv < 0) {
  269. kleave(" = -ENOMEM");
  270. return;
  271. }
  272. nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial);
  273. len = kv[0].iov_len;
  274. len += kv[1].iov_len;
  275. len += kv[2].iov_len;
  276. /* Extend a path MTU probe ACK. */
  277. if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
  278. size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header);
  279. if (len > probe_mtu)
  280. goto skip;
  281. while (len < probe_mtu) {
  282. size_t part = umin(probe_mtu - len, PAGE_SIZE);
  283. kv[nr_kv].iov_base = page_address(ZERO_PAGE(0));
  284. kv[nr_kv].iov_len = part;
  285. len += part;
  286. nr_kv++;
  287. }
  288. }
  289. call->ackr_nr_unacked = 0;
  290. atomic_set(&call->ackr_nr_consumed, 0);
  291. clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
  292. trace_rxrpc_send_ack(call, why, ack_reason, ack_serial);
  293. rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why);
  294. skip:
  295. rxrpc_free_ack(call);
  296. }
  297. /*
  298. * Send an ACK probe for path MTU discovery.
  299. */
  300. void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call)
  301. {
  302. rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
  303. rxrpc_propose_ack_ping_for_mtu_probe);
  304. }
  305. /*
  306. * Send an ABORT call packet.
  307. */
  308. int rxrpc_send_abort_packet(struct rxrpc_call *call)
  309. {
  310. struct rxrpc_connection *conn;
  311. struct rxrpc_abort_buffer pkt;
  312. struct msghdr msg;
  313. struct kvec iov[1];
  314. rxrpc_serial_t serial;
  315. int ret;
  316. /* Don't bother sending aborts for a client call once the server has
  317. * hard-ACK'd all of its request data. After that point, we're not
  318. * going to stop the operation proceeding, and whilst we might limit
  319. * the reply, it's not worth it if we can send a new call on the same
  320. * channel instead, thereby closing off this call.
  321. */
  322. if (rxrpc_is_client_call(call) &&
  323. test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
  324. return 0;
  325. if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
  326. return -ECONNRESET;
  327. conn = call->conn;
  328. msg.msg_name = &call->peer->srx.transport;
  329. msg.msg_namelen = call->peer->srx.transport_len;
  330. msg.msg_control = NULL;
  331. msg.msg_controllen = 0;
  332. msg.msg_flags = 0;
  333. pkt.whdr.epoch = htonl(conn->proto.epoch);
  334. pkt.whdr.cid = htonl(call->cid);
  335. pkt.whdr.callNumber = htonl(call->call_id);
  336. pkt.whdr.seq = 0;
  337. pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
  338. pkt.whdr.flags = conn->out_clientflag;
  339. pkt.whdr.userStatus = 0;
  340. pkt.whdr.securityIndex = call->security_ix;
  341. pkt.whdr._rsvd = 0;
  342. pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
  343. pkt.abort_code = htonl(call->abort_code);
  344. iov[0].iov_base = &pkt;
  345. iov[0].iov_len = sizeof(pkt);
  346. serial = rxrpc_get_next_serial(conn);
  347. pkt.whdr.serial = htonl(serial);
  348. iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
  349. ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
  350. rxrpc_peer_mark_tx(conn->peer);
  351. if (ret < 0)
  352. trace_rxrpc_tx_fail(call->debug_id, serial, ret,
  353. rxrpc_tx_point_call_abort);
  354. else
  355. trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
  356. rxrpc_tx_point_call_abort);
  357. rxrpc_tx_backoff(call, ret);
  358. return ret;
  359. }
  360. /*
  361. * Prepare a (sub)packet for transmission.
  362. */
  363. static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
  364. struct rxrpc_send_data_req *req,
  365. struct rxrpc_txbuf *txb,
  366. struct rxrpc_wire_header *whdr,
  367. rxrpc_serial_t serial, int subpkt)
  368. {
  369. struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo);
  370. enum rxrpc_req_ack_trace why;
  371. struct rxrpc_connection *conn = call->conn;
  372. struct kvec *kv = &call->local->kvec[1 + subpkt];
  373. size_t len = txb->pkt_len;
  374. bool last;
  375. u8 flags;
  376. _enter("%x,%zd", txb->seq, len);
  377. txb->serial = serial;
  378. if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
  379. txb->seq == 1)
  380. whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
  381. txb->flags &= ~RXRPC_REQUEST_ACK;
  382. flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
  383. last = txb->flags & RXRPC_LAST_PACKET;
  384. if (subpkt < req->n - 1) {
  385. len = RXRPC_JUMBO_DATALEN;
  386. goto dont_set_request_ack;
  387. }
  388. /* If our RTT cache needs working on, request an ACK. Also request
  389. * ACKs if a DATA packet appears to have been lost.
  390. *
  391. * However, we mustn't request an ACK on the last reply packet of a
  392. * service call, lest OpenAFS incorrectly send us an ACK with some
  393. * soft-ACKs in it and then never follow up with a proper hard ACK.
  394. */
  395. if (last && rxrpc_sending_to_client(txb))
  396. why = rxrpc_reqack_no_srv_last;
  397. else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
  398. why = rxrpc_reqack_ack_lost;
  399. else if (txb->flags & RXRPC_TXBUF_RESENT)
  400. why = rxrpc_reqack_retrans;
  401. else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND)
  402. why = rxrpc_reqack_slow_start;
  403. else if (call->tx_winsize <= 2)
  404. why = rxrpc_reqack_small_txwin;
  405. else if (call->rtt_count < 3)
  406. why = rxrpc_reqack_more_rtt;
  407. else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real()))
  408. why = rxrpc_reqack_old_rtt;
  409. else if (!last && !after(READ_ONCE(call->send_top), txb->seq))
  410. why = rxrpc_reqack_app_stall;
  411. else if (call->tx_winsize <= (2 * req->n) || call->cong_cwnd <= (2 * req->n))
  412. why = rxrpc_reqack_jumbo_win;
  413. else
  414. goto dont_set_request_ack;
  415. rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
  416. trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
  417. if (why != rxrpc_reqack_no_srv_last) {
  418. flags |= RXRPC_REQUEST_ACK;
  419. trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
  420. call->rtt_last_req = req->now;
  421. }
  422. dont_set_request_ack:
  423. /* There's a jumbo header prepended to the data if we need it. */
  424. if (subpkt < req->n - 1)
  425. flags |= RXRPC_JUMBO_PACKET;
  426. else
  427. flags &= ~RXRPC_JUMBO_PACKET;
  428. if (subpkt == 0) {
  429. whdr->flags = flags;
  430. whdr->cksum = txb->cksum;
  431. kv->iov_base = txb->data;
  432. } else {
  433. jumbo->flags = flags;
  434. jumbo->pad = 0;
  435. jumbo->cksum = txb->cksum;
  436. kv->iov_base = jumbo;
  437. len += sizeof(*jumbo);
  438. }
  439. trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace);
  440. kv->iov_len = len;
  441. return len;
  442. }
  443. /*
  444. * Prepare a transmission queue object for initial transmission. Returns the
  445. * number of microseconds since the transmission queue base timestamp.
  446. */
  447. static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq,
  448. struct rxrpc_send_data_req *req)
  449. {
  450. if (!tq)
  451. return 0;
  452. if (tq->xmit_ts_base == KTIME_MIN) {
  453. tq->xmit_ts_base = req->now;
  454. return 0;
  455. }
  456. return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base));
  457. }
  458. /*
  459. * Prepare a (jumbo) packet for transmission.
  460. */
  461. static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call,
  462. struct rxrpc_send_data_req *req,
  463. struct rxrpc_wire_header *whdr)
  464. {
  465. struct rxrpc_txqueue *tq = req->tq;
  466. rxrpc_serial_t serial;
  467. unsigned int xmit_ts;
  468. rxrpc_seq_t seq = req->seq;
  469. size_t len = 0;
  470. bool start_tlp = false;
  471. trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit);
  472. /* Each transmission of a Tx packet needs a new serial number */
  473. serial = rxrpc_get_next_serials(call->conn, req->n);
  474. whdr->epoch = htonl(call->conn->proto.epoch);
  475. whdr->cid = htonl(call->cid);
  476. whdr->callNumber = htonl(call->call_id);
  477. whdr->seq = htonl(seq);
  478. whdr->serial = htonl(serial);
  479. whdr->type = RXRPC_PACKET_TYPE_DATA;
  480. whdr->flags = 0;
  481. whdr->userStatus = 0;
  482. whdr->securityIndex = call->security_ix;
  483. whdr->_rsvd = 0;
  484. whdr->serviceId = htons(call->conn->service_id);
  485. call->tx_last_serial = serial + req->n - 1;
  486. call->tx_last_sent = req->now;
  487. xmit_ts = rxrpc_prepare_txqueue(tq, req);
  488. prefetch(tq->next);
  489. for (int i = 0;;) {
  490. int ix = seq & RXRPC_TXQ_MASK;
  491. struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
  492. _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
  493. /* Record (re-)transmission for RACK [RFC8985 6.1]. */
  494. if (__test_and_clear_bit(ix, &tq->segment_lost))
  495. call->tx_nr_lost--;
  496. if (req->retrans) {
  497. __set_bit(ix, &tq->ever_retransmitted);
  498. __set_bit(ix, &tq->segment_retransmitted);
  499. call->tx_nr_resent++;
  500. } else {
  501. call->tx_nr_sent++;
  502. start_tlp = true;
  503. }
  504. tq->segment_xmit_ts[ix] = xmit_ts;
  505. tq->segment_serial[ix] = serial;
  506. if (i + 1 == req->n)
  507. /* Only sample the last subpacket in a jumbo. */
  508. __set_bit(ix, &tq->rtt_samples);
  509. len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i);
  510. serial++;
  511. seq++;
  512. i++;
  513. if (i >= req->n)
  514. break;
  515. if (!(seq & RXRPC_TXQ_MASK)) {
  516. tq = tq->next;
  517. trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance);
  518. xmit_ts = rxrpc_prepare_txqueue(tq, req);
  519. }
  520. }
  521. /* Set timeouts */
  522. if (req->tlp_probe) {
  523. /* Sending TLP loss probe [RFC8985 7.3]. */
  524. call->tlp_serial = serial - 1;
  525. call->tlp_seq = seq - 1;
  526. } else if (start_tlp) {
  527. /* Schedule TLP loss probe [RFC8985 7.2]. */
  528. ktime_t pto;
  529. if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
  530. /* The first packet may take longer to elicit a response. */
  531. pto = NSEC_PER_SEC;
  532. else
  533. pto = rxrpc_tlp_calc_pto(call, req->now);
  534. call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO;
  535. call->rack_timo_at = ktime_add(req->now, pto);
  536. trace_rxrpc_rack_timer(call, pto, false);
  537. trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto);
  538. }
  539. if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
  540. ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
  541. call->expect_rx_by = ktime_add(req->now, delay);
  542. trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
  543. }
  544. rxrpc_set_keepalive(call, req->now);
  545. page_frag_free(whdr);
  546. return len;
  547. }
  548. /*
  549. * Send one or more packets through the transport endpoint
  550. */
  551. void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req)
  552. {
  553. struct rxrpc_wire_header *whdr;
  554. struct rxrpc_connection *conn = call->conn;
  555. enum rxrpc_tx_point frag;
  556. struct rxrpc_txqueue *tq = req->tq;
  557. struct rxrpc_txbuf *txb;
  558. struct msghdr msg;
  559. rxrpc_seq_t seq = req->seq;
  560. size_t len = sizeof(*whdr);
  561. bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags);
  562. int ret, stat_ix;
  563. _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1);
  564. whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS);
  565. if (!whdr)
  566. return; /* Drop the packet if no memory. */
  567. call->local->kvec[0].iov_base = whdr;
  568. call->local->kvec[0].iov_len = sizeof(*whdr);
  569. stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1;
  570. atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]);
  571. len += rxrpc_prepare_data_packet(call, req, whdr);
  572. txb = tq->bufs[seq & RXRPC_TXQ_MASK];
  573. iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len);
  574. msg.msg_name = &call->peer->srx.transport;
  575. msg.msg_namelen = call->peer->srx.transport_len;
  576. msg.msg_control = NULL;
  577. msg.msg_controllen = 0;
  578. msg.msg_flags = MSG_SPLICE_PAGES;
  579. /* Send the packet with the don't fragment bit set unless we think it's
  580. * too big or if this is a retransmission.
  581. */
  582. if (seq == call->tx_transmitted + 1 &&
  583. len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
  584. rxrpc_local_dont_fragment(conn->local, false);
  585. frag = rxrpc_tx_point_call_data_frag;
  586. } else {
  587. rxrpc_local_dont_fragment(conn->local, true);
  588. frag = rxrpc_tx_point_call_data_nofrag;
  589. }
  590. /* Track what we've attempted to transmit at least once so that the
  591. * retransmission algorithm doesn't try to resend what we haven't sent
  592. * yet.
  593. */
  594. if (seq == call->tx_transmitted + 1)
  595. call->tx_transmitted = seq + req->n - 1;
  596. if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
  597. static int lose;
  598. if ((lose++ & 7) == 7) {
  599. ret = 0;
  600. trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags,
  601. rxrpc_txdata_inject_loss);
  602. rxrpc_peer_mark_tx(conn->peer);
  603. goto done;
  604. }
  605. }
  606. /* send the packet by UDP
  607. * - returns -EMSGSIZE if UDP would have to fragment the packet
  608. * to go out of the interface
  609. * - in which case, we'll have processed the ICMP error
  610. * message and update the peer record
  611. */
  612. rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
  613. ret = do_udp_sendmsg(conn->local->socket, &msg, len);
  614. rxrpc_peer_mark_tx(conn->peer);
  615. if (ret == -EMSGSIZE) {
  616. rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
  617. trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
  618. ret = 0;
  619. } else if (ret < 0) {
  620. rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
  621. trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
  622. } else {
  623. trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
  624. }
  625. rxrpc_tx_backoff(call, ret);
  626. if (ret < 0) {
  627. /* Cancel the call if the initial transmission fails or if we
  628. * hit due to network routing issues that aren't going away
  629. * anytime soon. The layer above can arrange the
  630. * retransmission.
  631. */
  632. if (new_call ||
  633. ret == -ENETUNREACH ||
  634. ret == -EHOSTUNREACH ||
  635. ret == -ECONNREFUSED)
  636. rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
  637. RX_USER_ABORT, ret);
  638. }
  639. done:
  640. _leave(" = %d [%u]", ret, call->peer->max_data);
  641. }
  642. /*
  643. * Transmit a connection-level abort.
  644. */
  645. void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
  646. {
  647. struct rxrpc_wire_header whdr;
  648. struct msghdr msg;
  649. struct kvec iov[2];
  650. __be32 word;
  651. size_t len;
  652. u32 serial;
  653. int ret;
  654. msg.msg_name = &conn->peer->srx.transport;
  655. msg.msg_namelen = conn->peer->srx.transport_len;
  656. msg.msg_control = NULL;
  657. msg.msg_controllen = 0;
  658. msg.msg_flags = 0;
  659. whdr.epoch = htonl(conn->proto.epoch);
  660. whdr.cid = htonl(conn->proto.cid);
  661. whdr.callNumber = 0;
  662. whdr.seq = 0;
  663. whdr.type = RXRPC_PACKET_TYPE_ABORT;
  664. whdr.flags = conn->out_clientflag;
  665. whdr.userStatus = 0;
  666. whdr.securityIndex = conn->security_ix;
  667. whdr._rsvd = 0;
  668. whdr.serviceId = htons(conn->service_id);
  669. word = htonl(conn->abort_code);
  670. iov[0].iov_base = &whdr;
  671. iov[0].iov_len = sizeof(whdr);
  672. iov[1].iov_base = &word;
  673. iov[1].iov_len = sizeof(word);
  674. len = iov[0].iov_len + iov[1].iov_len;
  675. serial = rxrpc_get_next_serial(conn);
  676. whdr.serial = htonl(serial);
  677. iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
  678. ret = do_udp_sendmsg(conn->local->socket, &msg, len);
  679. if (ret < 0) {
  680. trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
  681. rxrpc_tx_point_conn_abort);
  682. _debug("sendmsg failed: %d", ret);
  683. return;
  684. }
  685. trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
  686. rxrpc_peer_mark_tx(conn->peer);
  687. }
  688. /*
  689. * Reject a packet through the local endpoint.
  690. */
  691. void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
  692. {
  693. struct rxrpc_wire_header whdr;
  694. struct sockaddr_rxrpc srx;
  695. struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
  696. struct msghdr msg;
  697. struct kvec iov[2];
  698. size_t size;
  699. __be32 code;
  700. int ret, ioc;
  701. if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
  702. return; /* Never abort an abort. */
  703. rxrpc_see_skb(skb, rxrpc_skb_see_reject);
  704. iov[0].iov_base = &whdr;
  705. iov[0].iov_len = sizeof(whdr);
  706. iov[1].iov_base = &code;
  707. iov[1].iov_len = sizeof(code);
  708. msg.msg_name = &srx.transport;
  709. msg.msg_control = NULL;
  710. msg.msg_controllen = 0;
  711. msg.msg_flags = 0;
  712. whdr = (struct rxrpc_wire_header) {
  713. .epoch = htonl(sp->hdr.epoch),
  714. .cid = htonl(sp->hdr.cid),
  715. .callNumber = htonl(sp->hdr.callNumber),
  716. .serviceId = htons(sp->hdr.serviceId),
  717. .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED,
  718. };
  719. switch (skb->mark) {
  720. case RXRPC_SKB_MARK_REJECT_BUSY:
  721. whdr.type = RXRPC_PACKET_TYPE_BUSY;
  722. size = sizeof(whdr);
  723. ioc = 1;
  724. break;
  725. case RXRPC_SKB_MARK_REJECT_CONN_ABORT:
  726. whdr.callNumber = 0;
  727. fallthrough;
  728. case RXRPC_SKB_MARK_REJECT_ABORT:
  729. whdr.type = RXRPC_PACKET_TYPE_ABORT;
  730. code = htonl(skb->priority);
  731. size = sizeof(whdr) + sizeof(code);
  732. ioc = 2;
  733. break;
  734. default:
  735. return;
  736. }
  737. if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
  738. msg.msg_namelen = srx.transport_len;
  739. iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
  740. ret = do_udp_sendmsg(local->socket, &msg, size);
  741. if (ret < 0)
  742. trace_rxrpc_tx_fail(local->debug_id, 0, ret,
  743. rxrpc_tx_point_reject);
  744. else
  745. trace_rxrpc_tx_packet(local->debug_id, &whdr,
  746. rxrpc_tx_point_reject);
  747. }
  748. }
  749. /*
  750. * Send a VERSION reply to a peer as a keepalive.
  751. */
  752. void rxrpc_send_keepalive(struct rxrpc_peer *peer)
  753. {
  754. struct rxrpc_wire_header whdr;
  755. struct msghdr msg;
  756. struct kvec iov[2];
  757. size_t len;
  758. int ret;
  759. _enter("");
  760. msg.msg_name = &peer->srx.transport;
  761. msg.msg_namelen = peer->srx.transport_len;
  762. msg.msg_control = NULL;
  763. msg.msg_controllen = 0;
  764. msg.msg_flags = 0;
  765. whdr.epoch = htonl(peer->local->rxnet->epoch);
  766. whdr.cid = 0;
  767. whdr.callNumber = 0;
  768. whdr.seq = 0;
  769. whdr.serial = 0;
  770. whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
  771. whdr.flags = RXRPC_LAST_PACKET;
  772. whdr.userStatus = 0;
  773. whdr.securityIndex = 0;
  774. whdr._rsvd = 0;
  775. whdr.serviceId = 0;
  776. iov[0].iov_base = &whdr;
  777. iov[0].iov_len = sizeof(whdr);
  778. iov[1].iov_base = (char *)rxrpc_keepalive_string;
  779. iov[1].iov_len = sizeof(rxrpc_keepalive_string);
  780. len = iov[0].iov_len + iov[1].iov_len;
  781. iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
  782. ret = do_udp_sendmsg(peer->local->socket, &msg, len);
  783. if (ret < 0)
  784. trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
  785. rxrpc_tx_point_version_keepalive);
  786. else
  787. trace_rxrpc_tx_packet(peer->debug_id, &whdr,
  788. rxrpc_tx_point_version_keepalive);
  789. rxrpc_peer_mark_tx(peer);
  790. _leave("");
  791. }
  792. /*
  793. * Send a RESPONSE message.
  794. */
  795. void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response)
  796. {
  797. struct rxrpc_skb_priv *sp = rxrpc_skb(response);
  798. struct scatterlist sg[16];
  799. struct bio_vec *bvec = conn->local->bvec;
  800. struct msghdr msg;
  801. size_t len = sp->resp.len;
  802. __be32 wserial;
  803. u32 serial = 0;
  804. int ret, nr_sg;
  805. _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial);
  806. sg_init_table(sg, ARRAY_SIZE(sg));
  807. ret = skb_to_sgvec(response, sg, 0, len);
  808. if (ret < 0)
  809. goto fail;
  810. nr_sg = ret;
  811. ret = -EIO;
  812. if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec)))
  813. goto fail;
  814. for (int i = 0; i < nr_sg; i++)
  815. bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
  816. iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len);
  817. msg.msg_name = &conn->peer->srx.transport;
  818. msg.msg_namelen = conn->peer->srx.transport_len;
  819. msg.msg_control = NULL;
  820. msg.msg_controllen = 0;
  821. msg.msg_flags = MSG_SPLICE_PAGES;
  822. serial = rxrpc_get_next_serials(conn, 1);
  823. wserial = htonl(serial);
  824. trace_rxrpc_tx_response(conn, serial, sp);
  825. ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial),
  826. &wserial, sizeof(wserial));
  827. if (ret < 0)
  828. goto fail;
  829. rxrpc_local_dont_fragment(conn->local, false);
  830. ret = do_udp_sendmsg(conn->local->socket, &msg, len);
  831. if (ret < 0)
  832. goto fail;
  833. rxrpc_peer_mark_tx(conn->peer);
  834. return;
  835. fail:
  836. trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
  837. rxrpc_tx_point_response);
  838. kleave(" = %d", ret);
  839. }