peer_event.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Peer event handling, typically ICMP messages.
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #include <linux/module.h>
  8. #include <linux/net.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/errqueue.h>
  11. #include <linux/udp.h>
  12. #include <linux/in.h>
  13. #include <linux/in6.h>
  14. #include <linux/icmp.h>
  15. #include <net/sock.h>
  16. #include <net/af_rxrpc.h>
  17. #include <net/ip.h>
  18. #include "ar-internal.h"
  19. static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *);
  20. static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *,
  21. enum rxrpc_call_completion, int);
  22. /*
  23. * Find the peer associated with a local error.
  24. */
  25. static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
  26. const struct sk_buff *skb,
  27. struct sockaddr_rxrpc *srx)
  28. {
  29. struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  30. _enter("");
  31. memset(srx, 0, sizeof(*srx));
  32. srx->transport_type = local->srx.transport_type;
  33. srx->transport_len = local->srx.transport_len;
  34. srx->transport.family = local->srx.transport.family;
  35. /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
  36. * versa?
  37. */
  38. switch (srx->transport.family) {
  39. case AF_INET:
  40. srx->transport_len = sizeof(srx->transport.sin);
  41. srx->transport.family = AF_INET;
  42. srx->transport.sin.sin_port = serr->port;
  43. switch (serr->ee.ee_origin) {
  44. case SO_EE_ORIGIN_ICMP:
  45. memcpy(&srx->transport.sin.sin_addr,
  46. skb_network_header(skb) + serr->addr_offset,
  47. sizeof(struct in_addr));
  48. break;
  49. case SO_EE_ORIGIN_ICMP6:
  50. memcpy(&srx->transport.sin.sin_addr,
  51. skb_network_header(skb) + serr->addr_offset + 12,
  52. sizeof(struct in_addr));
  53. break;
  54. default:
  55. memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
  56. sizeof(struct in_addr));
  57. break;
  58. }
  59. break;
  60. #ifdef CONFIG_AF_RXRPC_IPV6
  61. case AF_INET6:
  62. switch (serr->ee.ee_origin) {
  63. case SO_EE_ORIGIN_ICMP6:
  64. srx->transport.sin6.sin6_port = serr->port;
  65. memcpy(&srx->transport.sin6.sin6_addr,
  66. skb_network_header(skb) + serr->addr_offset,
  67. sizeof(struct in6_addr));
  68. break;
  69. case SO_EE_ORIGIN_ICMP:
  70. srx->transport_len = sizeof(srx->transport.sin);
  71. srx->transport.family = AF_INET;
  72. srx->transport.sin.sin_port = serr->port;
  73. memcpy(&srx->transport.sin.sin_addr,
  74. skb_network_header(skb) + serr->addr_offset,
  75. sizeof(struct in_addr));
  76. break;
  77. default:
  78. memcpy(&srx->transport.sin6.sin6_addr,
  79. &ipv6_hdr(skb)->saddr,
  80. sizeof(struct in6_addr));
  81. break;
  82. }
  83. break;
  84. #endif
  85. default:
  86. BUG();
  87. }
  88. return rxrpc_lookup_peer_rcu(local, srx);
  89. }
  90. /*
  91. * Handle an MTU/fragmentation problem.
  92. */
  93. static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
  94. {
  95. unsigned int max_data;
  96. /* wind down the local interface MTU */
  97. if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
  98. peer->if_mtu = mtu;
  99. if (mtu == 0) {
  100. /* they didn't give us a size, estimate one */
  101. mtu = peer->if_mtu;
  102. if (mtu > 1500) {
  103. mtu >>= 1;
  104. if (mtu < 1500)
  105. mtu = 1500;
  106. } else {
  107. mtu -= 100;
  108. if (mtu < peer->hdrsize)
  109. mtu = peer->hdrsize + 4;
  110. }
  111. }
  112. max_data = max_t(int, mtu - peer->hdrsize, 500);
  113. if (max_data < peer->max_data) {
  114. if (peer->pmtud_good > max_data)
  115. peer->pmtud_good = max_data;
  116. if (peer->pmtud_bad > max_data + 1)
  117. peer->pmtud_bad = max_data + 1;
  118. trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp);
  119. peer->max_data = max_data;
  120. }
  121. }
  122. /*
  123. * Handle an error received on the local endpoint.
  124. */
  125. void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb)
  126. {
  127. struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  128. struct sockaddr_rxrpc srx;
  129. struct rxrpc_peer *peer = NULL;
  130. _enter("L=%x", local->debug_id);
  131. if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
  132. _leave("UDP empty message");
  133. return;
  134. }
  135. rcu_read_lock();
  136. peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
  137. if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error))
  138. peer = NULL;
  139. rcu_read_unlock();
  140. if (!peer)
  141. return;
  142. trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
  143. if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
  144. serr->ee.ee_type == ICMP_DEST_UNREACH &&
  145. serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
  146. rxrpc_adjust_mtu(peer, serr->ee.ee_info);
  147. goto out;
  148. }
  149. if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 &&
  150. serr->ee.ee_type == ICMPV6_PKT_TOOBIG &&
  151. serr->ee.ee_code == 0)) {
  152. rxrpc_adjust_mtu(peer, serr->ee.ee_info);
  153. goto out;
  154. }
  155. rxrpc_store_error(peer, skb);
  156. out:
  157. rxrpc_put_peer(peer, rxrpc_peer_put_input_error);
  158. }
  159. /*
  160. * Map an error report to error codes on the peer record.
  161. */
  162. static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb)
  163. {
  164. enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
  165. struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  166. struct sock_extended_err *ee = &serr->ee;
  167. int err = ee->ee_errno;
  168. _enter("");
  169. switch (ee->ee_origin) {
  170. case SO_EE_ORIGIN_NONE:
  171. case SO_EE_ORIGIN_LOCAL:
  172. compl = RXRPC_CALL_LOCAL_ERROR;
  173. break;
  174. case SO_EE_ORIGIN_ICMP6:
  175. if (err == EACCES)
  176. err = EHOSTUNREACH;
  177. fallthrough;
  178. case SO_EE_ORIGIN_ICMP:
  179. default:
  180. break;
  181. }
  182. rxrpc_distribute_error(peer, skb, compl, err);
  183. }
  184. /*
  185. * Distribute an error that occurred on a peer.
  186. */
  187. static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
  188. enum rxrpc_call_completion compl, int err)
  189. {
  190. struct rxrpc_call *call;
  191. HLIST_HEAD(error_targets);
  192. spin_lock_irq(&peer->lock);
  193. hlist_move_list(&peer->error_targets, &error_targets);
  194. while (!hlist_empty(&error_targets)) {
  195. call = hlist_entry(error_targets.first,
  196. struct rxrpc_call, error_link);
  197. hlist_del_init(&call->error_link);
  198. spin_unlock_irq(&peer->lock);
  199. rxrpc_see_call(call, rxrpc_call_see_distribute_error);
  200. rxrpc_set_call_completion(call, compl, 0, -err);
  201. rxrpc_input_call_event(call);
  202. spin_lock_irq(&peer->lock);
  203. }
  204. spin_unlock_irq(&peer->lock);
  205. }
  206. /*
  207. * Reconstruct the last transmission time. The difference calculated should be
  208. * valid provided no more than ~68 years elapsed since the last transmission.
  209. */
  210. static time64_t rxrpc_peer_get_tx_mark(const struct rxrpc_peer *peer, time64_t base)
  211. {
  212. s32 last_tx_at = READ_ONCE(peer->last_tx_at);
  213. s32 base_lsw = base;
  214. s32 diff = last_tx_at - base_lsw;
  215. diff = clamp(diff, -RXRPC_KEEPALIVE_TIME, RXRPC_KEEPALIVE_TIME);
  216. return diff + base;
  217. }
  218. /*
  219. * Perform keep-alive pings.
  220. */
  221. static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
  222. struct list_head *collector,
  223. time64_t base,
  224. u8 cursor)
  225. {
  226. struct rxrpc_peer *peer;
  227. const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
  228. time64_t keepalive_at;
  229. bool use;
  230. int slot;
  231. spin_lock_bh(&rxnet->peer_hash_lock);
  232. while (!list_empty(collector)) {
  233. peer = list_entry(collector->next,
  234. struct rxrpc_peer, keepalive_link);
  235. list_del_init(&peer->keepalive_link);
  236. if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
  237. continue;
  238. use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
  239. spin_unlock_bh(&rxnet->peer_hash_lock);
  240. if (use) {
  241. keepalive_at = rxrpc_peer_get_tx_mark(peer, base) + RXRPC_KEEPALIVE_TIME;
  242. slot = keepalive_at - base;
  243. _debug("%02x peer %u t=%d {%pISp}",
  244. cursor, peer->debug_id, slot, &peer->srx.transport);
  245. if (keepalive_at <= base ||
  246. keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
  247. rxrpc_send_keepalive(peer);
  248. slot = RXRPC_KEEPALIVE_TIME;
  249. }
  250. /* A transmission to this peer occurred since last we
  251. * examined it so put it into the appropriate future
  252. * bucket.
  253. */
  254. slot += cursor;
  255. slot &= mask;
  256. spin_lock_bh(&rxnet->peer_hash_lock);
  257. list_add_tail(&peer->keepalive_link,
  258. &rxnet->peer_keepalive[slot & mask]);
  259. spin_unlock_bh(&rxnet->peer_hash_lock);
  260. rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
  261. }
  262. rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
  263. spin_lock_bh(&rxnet->peer_hash_lock);
  264. }
  265. spin_unlock_bh(&rxnet->peer_hash_lock);
  266. }
  267. /*
  268. * Perform keep-alive pings with VERSION packets to keep any NAT alive.
  269. */
  270. void rxrpc_peer_keepalive_worker(struct work_struct *work)
  271. {
  272. struct rxrpc_net *rxnet =
  273. container_of(work, struct rxrpc_net, peer_keepalive_work);
  274. const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
  275. time64_t base, now, delay;
  276. u8 cursor, stop;
  277. LIST_HEAD(collector);
  278. now = ktime_get_seconds();
  279. base = rxnet->peer_keepalive_base;
  280. cursor = rxnet->peer_keepalive_cursor;
  281. _enter("%lld,%u", base - now, cursor);
  282. if (!rxnet->live)
  283. return;
  284. /* Remove to a temporary list all the peers that are currently lodged
  285. * in expired buckets plus all new peers.
  286. *
  287. * Everything in the bucket at the cursor is processed this
  288. * second; the bucket at cursor + 1 goes at now + 1s and so
  289. * on...
  290. */
  291. spin_lock_bh(&rxnet->peer_hash_lock);
  292. list_splice_init(&rxnet->peer_keepalive_new, &collector);
  293. stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
  294. while (base <= now && (s8)(cursor - stop) < 0) {
  295. list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
  296. &collector);
  297. base++;
  298. cursor++;
  299. }
  300. base = now;
  301. spin_unlock_bh(&rxnet->peer_hash_lock);
  302. rxnet->peer_keepalive_base = base;
  303. rxnet->peer_keepalive_cursor = cursor;
  304. rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
  305. ASSERT(list_empty(&collector));
  306. /* Schedule the timer for the next occupied timeslot. */
  307. cursor = rxnet->peer_keepalive_cursor;
  308. stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
  309. for (; (s8)(cursor - stop) < 0; cursor++) {
  310. if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
  311. break;
  312. base++;
  313. }
  314. now = ktime_get_seconds();
  315. delay = base - now;
  316. if (delay < 1)
  317. delay = 1;
  318. delay *= HZ;
  319. if (rxnet->live)
  320. timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
  321. _leave("");
  322. }
  323. /*
  324. * Do path MTU probing.
  325. */
  326. void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
  327. bool sendmsg_fail)
  328. {
  329. struct rxrpc_peer *peer = conn->peer;
  330. unsigned int max_data = peer->max_data;
  331. int good, trial, bad, jumbo;
  332. good = peer->pmtud_good;
  333. trial = peer->pmtud_trial;
  334. bad = peer->pmtud_bad;
  335. if (good >= bad - 1) {
  336. conn->pmtud_probe = 0;
  337. peer->pmtud_lost = false;
  338. return;
  339. }
  340. if (!peer->pmtud_probing)
  341. goto send_probe;
  342. if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) {
  343. /* Retry a lost probe. */
  344. if (!peer->pmtud_lost) {
  345. trace_rxrpc_pmtud_lost(conn, acked_serial);
  346. conn->pmtud_probe = 0;
  347. peer->pmtud_lost = true;
  348. goto send_probe;
  349. }
  350. /* The probed size didn't seem to get through. */
  351. bad = trial;
  352. peer->pmtud_bad = bad;
  353. if (bad <= max_data)
  354. max_data = bad - 1;
  355. } else {
  356. /* It did get through. */
  357. good = trial;
  358. peer->pmtud_good = good;
  359. if (good > max_data)
  360. max_data = good;
  361. }
  362. max_data = umin(max_data, peer->ackr_max_data);
  363. if (max_data != peer->max_data)
  364. peer->max_data = max_data;
  365. jumbo = max_data + sizeof(struct rxrpc_jumbo_header);
  366. jumbo /= RXRPC_JUMBO_SUBPKTLEN;
  367. peer->pmtud_jumbo = jumbo;
  368. trace_rxrpc_pmtud_rx(conn, acked_serial);
  369. conn->pmtud_probe = 0;
  370. peer->pmtud_lost = false;
  371. if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2))
  372. trial = RXRPC_JUMBO(2);
  373. else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4))
  374. trial = RXRPC_JUMBO(4);
  375. else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3))
  376. trial = RXRPC_JUMBO(3);
  377. else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6))
  378. trial = RXRPC_JUMBO(6);
  379. else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5))
  380. trial = RXRPC_JUMBO(5);
  381. else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8))
  382. trial = RXRPC_JUMBO(8);
  383. else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7))
  384. trial = RXRPC_JUMBO(7);
  385. else
  386. trial = (good + bad) / 2;
  387. peer->pmtud_trial = trial;
  388. if (good >= bad)
  389. return;
  390. send_probe:
  391. peer->pmtud_pending = true;
  392. }