espintcp.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <net/tcp.h>
  3. #include <net/strparser.h>
  4. #include <net/xfrm.h>
  5. #include <net/esp.h>
  6. #include <net/espintcp.h>
  7. #include <linux/skmsg.h>
  8. #include <net/inet_common.h>
  9. #include <trace/events/sock.h>
  10. #if IS_ENABLED(CONFIG_IPV6)
  11. #include <net/ipv6_stubs.h>
  12. #endif
  13. #include <net/hotdata.h>
  14. static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
  15. struct sock *sk)
  16. {
  17. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
  18. !sk_rmem_schedule(sk, skb, skb->truesize)) {
  19. XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR);
  20. kfree_skb(skb);
  21. return;
  22. }
  23. skb_set_owner_r(skb, sk);
  24. memset(skb->cb, 0, sizeof(skb->cb));
  25. skb_queue_tail(&ctx->ike_queue, skb);
  26. ctx->saved_data_ready(sk);
  27. }
  28. static void handle_esp(struct sk_buff *skb, struct sock *sk)
  29. {
  30. struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb;
  31. skb_reset_transport_header(skb);
  32. /* restore IP CB, we need at least IP6CB->nhoff */
  33. memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header));
  34. rcu_read_lock();
  35. skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
  36. local_bh_disable();
  37. #if IS_ENABLED(CONFIG_IPV6)
  38. if (sk->sk_family == AF_INET6)
  39. ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
  40. else
  41. #endif
  42. xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
  43. local_bh_enable();
  44. rcu_read_unlock();
  45. }
  46. static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
  47. {
  48. struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
  49. strp);
  50. struct strp_msg *rxm = strp_msg(skb);
  51. int len = rxm->full_len - 2;
  52. u32 nonesp_marker;
  53. int err;
  54. /* keepalive packet? */
  55. if (unlikely(len == 1)) {
  56. u8 data;
  57. err = skb_copy_bits(skb, rxm->offset + 2, &data, 1);
  58. if (err < 0) {
  59. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  60. kfree_skb(skb);
  61. return;
  62. }
  63. if (data == 0xff) {
  64. kfree_skb(skb);
  65. return;
  66. }
  67. }
  68. /* drop other short messages */
  69. if (unlikely(len <= sizeof(nonesp_marker))) {
  70. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  71. kfree_skb(skb);
  72. return;
  73. }
  74. err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
  75. sizeof(nonesp_marker));
  76. if (err < 0) {
  77. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  78. kfree_skb(skb);
  79. return;
  80. }
  81. /* remove header, leave non-ESP marker/SPI */
  82. if (!pskb_pull(skb, rxm->offset + 2)) {
  83. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
  84. kfree_skb(skb);
  85. return;
  86. }
  87. if (pskb_trim(skb, rxm->full_len - 2) != 0) {
  88. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
  89. kfree_skb(skb);
  90. return;
  91. }
  92. if (nonesp_marker == 0)
  93. handle_nonesp(ctx, skb, strp->sk);
  94. else
  95. handle_esp(skb, strp->sk);
  96. }
  97. static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
  98. {
  99. struct strp_msg *rxm = strp_msg(skb);
  100. __be16 blen;
  101. u16 len;
  102. int err;
  103. if (skb->len < rxm->offset + 2)
  104. return 0;
  105. err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
  106. if (err < 0)
  107. return err;
  108. len = be16_to_cpu(blen);
  109. if (len < 2)
  110. return -EINVAL;
  111. return len;
  112. }
  113. static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
  114. int flags, int *addr_len)
  115. {
  116. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  117. struct sk_buff *skb;
  118. int err = 0;
  119. int copied;
  120. int off = 0;
  121. skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
  122. if (!skb) {
  123. if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
  124. return 0;
  125. return err;
  126. }
  127. copied = len;
  128. if (copied > skb->len)
  129. copied = skb->len;
  130. else if (copied < skb->len)
  131. msg->msg_flags |= MSG_TRUNC;
  132. err = skb_copy_datagram_msg(skb, 0, msg, copied);
  133. if (unlikely(err)) {
  134. kfree_skb(skb);
  135. return err;
  136. }
  137. if (flags & MSG_TRUNC)
  138. copied = skb->len;
  139. kfree_skb(skb);
  140. return copied;
  141. }
  142. int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
  143. {
  144. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  145. if (skb_queue_len(&ctx->out_queue) >=
  146. READ_ONCE(net_hotdata.max_backlog)) {
  147. kfree_skb(skb);
  148. return -ENOBUFS;
  149. }
  150. __skb_queue_tail(&ctx->out_queue, skb);
  151. return 0;
  152. }
  153. EXPORT_SYMBOL_GPL(espintcp_queue_out);
  154. /* espintcp length field is 2B and length includes the length field's size */
  155. #define MAX_ESPINTCP_MSG (((1 << 16) - 1) - 2)
  156. static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg,
  157. int flags)
  158. {
  159. do {
  160. int ret;
  161. ret = skb_send_sock_locked(sk, emsg->skb,
  162. emsg->offset, emsg->len);
  163. if (ret < 0)
  164. return ret;
  165. emsg->len -= ret;
  166. emsg->offset += ret;
  167. } while (emsg->len > 0);
  168. kfree_skb(emsg->skb);
  169. memset(emsg, 0, sizeof(*emsg));
  170. return 0;
  171. }
  172. static int espintcp_sendskmsg_locked(struct sock *sk,
  173. struct espintcp_msg *emsg, int flags)
  174. {
  175. struct msghdr msghdr = {
  176. .msg_flags = flags | MSG_SPLICE_PAGES | MSG_MORE,
  177. };
  178. struct sk_msg *skmsg = &emsg->skmsg;
  179. bool more = flags & MSG_MORE;
  180. struct scatterlist *sg;
  181. int done = 0;
  182. int ret;
  183. sg = &skmsg->sg.data[skmsg->sg.start];
  184. do {
  185. struct bio_vec bvec;
  186. size_t size = sg->length - emsg->offset;
  187. int offset = sg->offset + emsg->offset;
  188. struct page *p;
  189. emsg->offset = 0;
  190. if (sg_is_last(sg) && !more)
  191. msghdr.msg_flags &= ~MSG_MORE;
  192. p = sg_page(sg);
  193. retry:
  194. bvec_set_page(&bvec, p, size, offset);
  195. iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
  196. ret = tcp_sendmsg_locked(sk, &msghdr, size);
  197. if (ret < 0) {
  198. emsg->offset = offset - sg->offset;
  199. skmsg->sg.start += done;
  200. return ret;
  201. }
  202. if (ret != size) {
  203. offset += ret;
  204. size -= ret;
  205. goto retry;
  206. }
  207. done++;
  208. put_page(p);
  209. sk_mem_uncharge(sk, sg->length);
  210. sg = sg_next(sg);
  211. } while (sg);
  212. memset(emsg, 0, sizeof(*emsg));
  213. return 0;
  214. }
  215. static int espintcp_push_msgs(struct sock *sk, int flags)
  216. {
  217. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  218. struct espintcp_msg *emsg = &ctx->partial;
  219. int err;
  220. if (!emsg->len)
  221. return 0;
  222. if (ctx->tx_running)
  223. return -EAGAIN;
  224. ctx->tx_running = 1;
  225. if (emsg->skb)
  226. err = espintcp_sendskb_locked(sk, emsg, flags);
  227. else
  228. err = espintcp_sendskmsg_locked(sk, emsg, flags);
  229. if (err == -EAGAIN) {
  230. ctx->tx_running = 0;
  231. return flags & MSG_DONTWAIT ? -EAGAIN : 0;
  232. }
  233. if (!err)
  234. memset(emsg, 0, sizeof(*emsg));
  235. ctx->tx_running = 0;
  236. return err;
  237. }
  238. int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
  239. {
  240. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  241. struct espintcp_msg *emsg = &ctx->partial;
  242. unsigned int len;
  243. int offset;
  244. if (sk->sk_state != TCP_ESTABLISHED) {
  245. kfree_skb(skb);
  246. return -ECONNRESET;
  247. }
  248. offset = skb_transport_offset(skb);
  249. len = skb->len - offset;
  250. espintcp_push_msgs(sk, 0);
  251. if (emsg->len) {
  252. kfree_skb(skb);
  253. return -ENOBUFS;
  254. }
  255. skb_set_owner_w(skb, sk);
  256. emsg->offset = offset;
  257. emsg->len = len;
  258. emsg->skb = skb;
  259. espintcp_push_msgs(sk, 0);
  260. return 0;
  261. }
  262. EXPORT_SYMBOL_GPL(espintcp_push_skb);
  263. static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
  264. {
  265. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  266. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  267. struct espintcp_msg *emsg = &ctx->partial;
  268. struct iov_iter pfx_iter;
  269. struct kvec pfx_iov = {};
  270. size_t msglen = size + 2;
  271. char buf[2] = {0};
  272. int err, end;
  273. if (msg->msg_flags & ~MSG_DONTWAIT)
  274. return -EOPNOTSUPP;
  275. if (size > MAX_ESPINTCP_MSG)
  276. return -EMSGSIZE;
  277. if (msg->msg_controllen)
  278. return -EOPNOTSUPP;
  279. lock_sock(sk);
  280. err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
  281. if (err < 0) {
  282. if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT))
  283. err = -ENOBUFS;
  284. goto unlock;
  285. }
  286. sk_msg_init(&emsg->skmsg);
  287. while (1) {
  288. /* only -ENOMEM is possible since we don't coalesce */
  289. err = sk_msg_alloc(sk, &emsg->skmsg, msglen, 0);
  290. if (!err)
  291. break;
  292. err = sk_stream_wait_memory(sk, &timeo);
  293. if (err)
  294. goto fail;
  295. }
  296. *((__be16 *)buf) = cpu_to_be16(msglen);
  297. pfx_iov.iov_base = buf;
  298. pfx_iov.iov_len = sizeof(buf);
  299. iov_iter_kvec(&pfx_iter, ITER_SOURCE, &pfx_iov, 1, pfx_iov.iov_len);
  300. err = sk_msg_memcopy_from_iter(sk, &pfx_iter, &emsg->skmsg,
  301. pfx_iov.iov_len);
  302. if (err < 0)
  303. goto fail;
  304. err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, &emsg->skmsg, size);
  305. if (err < 0)
  306. goto fail;
  307. end = emsg->skmsg.sg.end;
  308. emsg->len = size;
  309. sk_msg_iter_var_prev(end);
  310. sg_mark_end(sk_msg_elem(&emsg->skmsg, end));
  311. tcp_rate_check_app_limited(sk);
  312. err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
  313. /* this message could be partially sent, keep it */
  314. release_sock(sk);
  315. return size;
  316. fail:
  317. sk_msg_free(sk, &emsg->skmsg);
  318. memset(emsg, 0, sizeof(*emsg));
  319. unlock:
  320. release_sock(sk);
  321. return err;
  322. }
  323. static struct proto espintcp_prot __ro_after_init;
  324. static struct proto_ops espintcp_ops __ro_after_init;
  325. static struct proto espintcp6_prot;
  326. static struct proto_ops espintcp6_ops;
  327. static DEFINE_MUTEX(tcpv6_prot_mutex);
  328. static void espintcp_data_ready(struct sock *sk)
  329. {
  330. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  331. trace_sk_data_ready(sk);
  332. strp_data_ready(&ctx->strp);
  333. }
  334. static void espintcp_tx_work(struct work_struct *work)
  335. {
  336. struct espintcp_ctx *ctx = container_of(work,
  337. struct espintcp_ctx, work);
  338. struct sock *sk = ctx->strp.sk;
  339. lock_sock(sk);
  340. if (!ctx->tx_running)
  341. espintcp_push_msgs(sk, 0);
  342. release_sock(sk);
  343. }
  344. static void espintcp_write_space(struct sock *sk)
  345. {
  346. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  347. schedule_work(&ctx->work);
  348. ctx->saved_write_space(sk);
  349. }
  350. static void espintcp_destruct(struct sock *sk)
  351. {
  352. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  353. ctx->saved_destruct(sk);
  354. kfree(ctx);
  355. }
  356. bool tcp_is_ulp_esp(struct sock *sk)
  357. {
  358. return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
  359. }
  360. EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
  361. static void build_protos(struct proto *espintcp_prot,
  362. struct proto_ops *espintcp_ops,
  363. const struct proto *orig_prot,
  364. const struct proto_ops *orig_ops);
  365. static int espintcp_init_sk(struct sock *sk)
  366. {
  367. struct inet_connection_sock *icsk = inet_csk(sk);
  368. struct strp_callbacks cb = {
  369. .rcv_msg = espintcp_rcv,
  370. .parse_msg = espintcp_parse,
  371. };
  372. struct espintcp_ctx *ctx;
  373. int err;
  374. /* sockmap is not compatible with espintcp */
  375. if (sk->sk_user_data)
  376. return -EBUSY;
  377. ctx = kzalloc_obj(*ctx);
  378. if (!ctx)
  379. return -ENOMEM;
  380. err = strp_init(&ctx->strp, sk, &cb);
  381. if (err)
  382. goto free;
  383. __sk_dst_reset(sk);
  384. strp_check_rcv(&ctx->strp);
  385. skb_queue_head_init(&ctx->ike_queue);
  386. skb_queue_head_init(&ctx->out_queue);
  387. if (sk->sk_family == AF_INET) {
  388. sk->sk_prot = &espintcp_prot;
  389. sk->sk_socket->ops = &espintcp_ops;
  390. } else {
  391. mutex_lock(&tcpv6_prot_mutex);
  392. if (!espintcp6_prot.recvmsg)
  393. build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
  394. mutex_unlock(&tcpv6_prot_mutex);
  395. sk->sk_prot = &espintcp6_prot;
  396. sk->sk_socket->ops = &espintcp6_ops;
  397. }
  398. ctx->saved_data_ready = sk->sk_data_ready;
  399. ctx->saved_write_space = sk->sk_write_space;
  400. ctx->saved_destruct = sk->sk_destruct;
  401. sk->sk_data_ready = espintcp_data_ready;
  402. sk->sk_write_space = espintcp_write_space;
  403. sk->sk_destruct = espintcp_destruct;
  404. rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
  405. INIT_WORK(&ctx->work, espintcp_tx_work);
  406. /* avoid using task_frag */
  407. sk->sk_allocation = GFP_ATOMIC;
  408. sk->sk_use_task_frag = false;
  409. return 0;
  410. free:
  411. kfree(ctx);
  412. return err;
  413. }
  414. static void espintcp_release(struct sock *sk)
  415. {
  416. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  417. struct sk_buff_head queue;
  418. struct sk_buff *skb;
  419. __skb_queue_head_init(&queue);
  420. skb_queue_splice_init(&ctx->out_queue, &queue);
  421. while ((skb = __skb_dequeue(&queue)))
  422. espintcp_push_skb(sk, skb);
  423. tcp_release_cb(sk);
  424. }
  425. static void espintcp_close(struct sock *sk, long timeout)
  426. {
  427. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  428. struct espintcp_msg *emsg = &ctx->partial;
  429. strp_stop(&ctx->strp);
  430. sk->sk_prot = &tcp_prot;
  431. barrier();
  432. disable_work_sync(&ctx->work);
  433. strp_done(&ctx->strp);
  434. skb_queue_purge(&ctx->out_queue);
  435. skb_queue_purge(&ctx->ike_queue);
  436. if (emsg->len) {
  437. if (emsg->skb)
  438. kfree_skb(emsg->skb);
  439. else
  440. sk_msg_free(sk, &emsg->skmsg);
  441. }
  442. tcp_close(sk, timeout);
  443. }
  444. static __poll_t espintcp_poll(struct file *file, struct socket *sock,
  445. poll_table *wait)
  446. {
  447. struct sock *sk = sock->sk;
  448. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  449. return datagram_poll_queue(file, sock, wait, &ctx->ike_queue);
  450. }
  451. static void build_protos(struct proto *espintcp_prot,
  452. struct proto_ops *espintcp_ops,
  453. const struct proto *orig_prot,
  454. const struct proto_ops *orig_ops)
  455. {
  456. memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
  457. memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
  458. espintcp_prot->sendmsg = espintcp_sendmsg;
  459. espintcp_prot->recvmsg = espintcp_recvmsg;
  460. espintcp_prot->close = espintcp_close;
  461. espintcp_prot->release_cb = espintcp_release;
  462. espintcp_ops->poll = espintcp_poll;
  463. }
  464. static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
  465. .name = "espintcp",
  466. .owner = THIS_MODULE,
  467. .init = espintcp_init_sk,
  468. };
  469. void __init espintcp_init(void)
  470. {
  471. build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
  472. tcp_register_ulp(&espintcp_ulp);
  473. }