request_sock.h 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * NET Generic infrastructure for Network protocols.
  4. *
  5. * Definitions for request_sock
  6. *
  7. * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  8. *
  9. * From code originally in include/net/tcp.h
  10. */
  11. #ifndef _REQUEST_SOCK_H
  12. #define _REQUEST_SOCK_H
  13. #include <linux/slab.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/types.h>
  16. #include <linux/bug.h>
  17. #include <linux/refcount.h>
  18. #include <net/sock.h>
  19. #include <net/rstreason.h>
  20. struct request_sock;
  21. struct sk_buff;
  22. struct dst_entry;
  23. struct proto;
  24. struct request_sock_ops {
  25. int family;
  26. unsigned int obj_size;
  27. struct kmem_cache *slab;
  28. char *slab_name;
  29. void (*send_ack)(const struct sock *sk, struct sk_buff *skb,
  30. struct request_sock *req);
  31. void (*send_reset)(const struct sock *sk,
  32. struct sk_buff *skb,
  33. enum sk_rst_reason reason);
  34. void (*destructor)(struct request_sock *req);
  35. };
  36. struct saved_syn {
  37. u32 mac_hdrlen;
  38. u32 network_hdrlen;
  39. u32 tcp_hdrlen;
  40. u8 data[];
  41. };
  42. /* struct request_sock - mini sock to represent a connection request
  43. */
  44. struct request_sock {
  45. struct sock_common __req_common;
  46. #define rsk_refcnt __req_common.skc_refcnt
  47. #define rsk_hash __req_common.skc_hash
  48. #define rsk_listener __req_common.skc_listener
  49. #define rsk_window_clamp __req_common.skc_window_clamp
  50. #define rsk_rcv_wnd __req_common.skc_rcv_wnd
  51. struct request_sock *dl_next;
  52. u16 mss;
  53. u8 num_retrans; /* number of retransmits */
  54. u8 syncookie:1; /* True if
  55. * 1) tcpopts needs to be encoded in
  56. * TS of SYN+ACK
  57. * 2) ACK is validated by BPF kfunc.
  58. */
  59. u8 num_timeout:7; /* number of timeouts */
  60. u32 ts_recent;
  61. struct timer_list rsk_timer;
  62. const struct request_sock_ops *rsk_ops;
  63. struct sock *sk;
  64. struct saved_syn *saved_syn;
  65. u32 secid;
  66. u32 peer_secid;
  67. u32 timeout;
  68. };
  69. static inline struct request_sock *inet_reqsk(const struct sock *sk)
  70. {
  71. return (struct request_sock *)sk;
  72. }
  73. static inline struct sock *req_to_sk(struct request_sock *req)
  74. {
  75. return (struct sock *)req;
  76. }
  77. /**
  78. * skb_steal_sock - steal a socket from an sk_buff
  79. * @skb: sk_buff to steal the socket from
  80. * @refcounted: is set to true if the socket is reference-counted
  81. * @prefetched: is set to true if the socket was assigned from bpf
  82. */
  83. static inline struct sock *skb_steal_sock(struct sk_buff *skb,
  84. bool *refcounted, bool *prefetched)
  85. {
  86. struct sock *sk = skb->sk;
  87. if (!sk) {
  88. *prefetched = false;
  89. *refcounted = false;
  90. return NULL;
  91. }
  92. *prefetched = skb_sk_is_prefetched(skb);
  93. if (*prefetched) {
  94. #if IS_ENABLED(CONFIG_SYN_COOKIES)
  95. if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
  96. struct request_sock *req = inet_reqsk(sk);
  97. *refcounted = false;
  98. sk = req->rsk_listener;
  99. req->rsk_listener = NULL;
  100. return sk;
  101. }
  102. #endif
  103. *refcounted = sk_is_refcounted(sk);
  104. } else {
  105. *refcounted = true;
  106. }
  107. skb->destructor = NULL;
  108. skb->sk = NULL;
  109. return sk;
  110. }
  111. void __reqsk_free(struct request_sock *req);
  112. static inline void reqsk_free(struct request_sock *req)
  113. {
  114. DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
  115. __reqsk_free(req);
  116. }
  117. static inline void reqsk_put(struct request_sock *req)
  118. {
  119. if (refcount_dec_and_test(&req->rsk_refcnt))
  120. __reqsk_free(req);
  121. }
  122. /*
  123. * For a TCP Fast Open listener -
  124. * lock - protects the access to all the reqsk, which is co-owned by
  125. * the listener and the child socket.
  126. * qlen - pending TFO requests (still in TCP_SYN_RECV).
  127. * max_qlen - max TFO reqs allowed before TFO is disabled.
  128. *
  129. * XXX (TFO) - ideally these fields can be made as part of "listen_sock"
  130. * structure above. But there is some implementation difficulty due to
  131. * listen_sock being part of request_sock_queue hence will be freed when
  132. * a listener is stopped. But TFO related fields may continue to be
  133. * accessed even after a listener is closed, until its sk_refcnt drops
  134. * to 0 implying no more outstanding TFO reqs. One solution is to keep
  135. * listen_opt around until sk_refcnt drops to 0. But there is some other
  136. * complexity that needs to be resolved. E.g., a listener can be disabled
  137. * temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
  138. */
  139. struct fastopen_queue {
  140. struct request_sock *rskq_rst_head; /* Keep track of past TFO */
  141. struct request_sock *rskq_rst_tail; /* requests that caused RST.
  142. * This is part of the defense
  143. * against spoofing attack.
  144. */
  145. spinlock_t lock;
  146. int qlen; /* # of pending (TCP_SYN_RECV) reqs */
  147. int max_qlen; /* != 0 iff TFO is currently enabled */
  148. struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */
  149. };
  150. /** struct request_sock_queue - queue of request_socks
  151. *
  152. * @rskq_accept_head - FIFO head of established children
  153. * @rskq_accept_tail - FIFO tail of established children
  154. * @rskq_defer_accept - User waits for some data after accept()
  155. *
  156. */
  157. struct request_sock_queue {
  158. spinlock_t rskq_lock;
  159. u8 rskq_defer_accept;
  160. u8 synflood_warned;
  161. atomic_t qlen;
  162. atomic_t young;
  163. struct request_sock *rskq_accept_head;
  164. struct request_sock *rskq_accept_tail;
  165. struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine
  166. * if TFO is enabled.
  167. */
  168. };
  169. void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
  170. bool reset);
  171. static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
  172. {
  173. return READ_ONCE(queue->rskq_accept_head) == NULL;
  174. }
  175. static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
  176. struct sock *parent)
  177. {
  178. struct request_sock *req;
  179. spin_lock_bh(&queue->rskq_lock);
  180. req = queue->rskq_accept_head;
  181. if (req) {
  182. sk_acceptq_removed(parent);
  183. WRITE_ONCE(queue->rskq_accept_head, req->dl_next);
  184. if (queue->rskq_accept_head == NULL)
  185. queue->rskq_accept_tail = NULL;
  186. }
  187. spin_unlock_bh(&queue->rskq_lock);
  188. return req;
  189. }
  190. static inline void reqsk_queue_removed(struct request_sock_queue *queue,
  191. const struct request_sock *req)
  192. {
  193. if (req->num_timeout == 0)
  194. atomic_dec(&queue->young);
  195. atomic_dec(&queue->qlen);
  196. }
  197. static inline void reqsk_queue_added(struct request_sock_queue *queue)
  198. {
  199. atomic_inc(&queue->young);
  200. atomic_inc(&queue->qlen);
  201. }
  202. static inline int reqsk_queue_len(const struct request_sock_queue *queue)
  203. {
  204. return atomic_read(&queue->qlen);
  205. }
  206. static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
  207. {
  208. return atomic_read(&queue->young);
  209. }
  210. /* RFC 7323 2.3 Using the Window Scale Option
  211. * The window field (SEG.WND) of every outgoing segment, with the
  212. * exception of <SYN> segments, MUST be right-shifted by
  213. * Rcv.Wind.Shift bits.
  214. *
  215. * This means the SEG.WND carried in SYNACK can not exceed 65535.
  216. * We use this property to harden TCP stack while in NEW_SYN_RECV state.
  217. */
  218. static inline u32 tcp_synack_window(const struct request_sock *req)
  219. {
  220. return min(req->rsk_rcv_wnd, 65535U);
  221. }
  222. #endif /* _REQUEST_SOCK_H */